1 #ifdef CONFIG_CPU_SUP_INTEL
3 /* The maximal number of PEBS events: */
4 #define MAX_PEBS_EVENTS 4
6 /* The size of a BTS record in bytes: */
7 #define BTS_RECORD_SIZE 24
9 #define BTS_BUFFER_SIZE (PAGE_SIZE << 4)
10 #define PEBS_BUFFER_SIZE PAGE_SIZE
13 * pebs_record_32 for p4 and core not supported
15 struct pebs_record_32 {
23 struct pebs_record_core
{
28 u64 r12
, r13
, r14
, r15
;
31 struct pebs_record_nhm
{
36 u64 r12
, r13
, r14
, r15
;
37 u64 status
, dla
, dse
, lat
;
41 * A debug store configuration.
43 * We only support architectures that use 64bit fields.
48 u64 bts_absolute_maximum
;
49 u64 bts_interrupt_threshold
;
52 u64 pebs_absolute_maximum
;
53 u64 pebs_interrupt_threshold
;
54 u64 pebs_event_reset
[MAX_PEBS_EVENTS
];
57 static void init_debug_store_on_cpu(int cpu
)
59 struct debug_store
*ds
= per_cpu(cpu_hw_events
, cpu
).ds
;
64 wrmsr_on_cpu(cpu
, MSR_IA32_DS_AREA
,
65 (u32
)((u64
)(unsigned long)ds
),
66 (u32
)((u64
)(unsigned long)ds
>> 32));
69 static void fini_debug_store_on_cpu(int cpu
)
71 if (!per_cpu(cpu_hw_events
, cpu
).ds
)
74 wrmsr_on_cpu(cpu
, MSR_IA32_DS_AREA
, 0, 0);
77 static void release_ds_buffers(void)
81 if (!x86_pmu
.bts
&& !x86_pmu
.pebs
)
86 for_each_online_cpu(cpu
)
87 fini_debug_store_on_cpu(cpu
);
89 for_each_possible_cpu(cpu
) {
90 struct debug_store
*ds
= per_cpu(cpu_hw_events
, cpu
).ds
;
95 per_cpu(cpu_hw_events
, cpu
).ds
= NULL
;
97 kfree((void *)(unsigned long)ds
->pebs_buffer_base
);
98 kfree((void *)(unsigned long)ds
->bts_buffer_base
);
105 static int reserve_ds_buffers(void)
109 if (!x86_pmu
.bts
&& !x86_pmu
.pebs
)
114 for_each_possible_cpu(cpu
) {
115 struct debug_store
*ds
;
120 ds
= kzalloc(sizeof(*ds
), GFP_KERNEL
);
123 per_cpu(cpu_hw_events
, cpu
).ds
= ds
;
126 buffer
= kzalloc(BTS_BUFFER_SIZE
, GFP_KERNEL
);
127 if (unlikely(!buffer
))
130 max
= BTS_BUFFER_SIZE
/ BTS_RECORD_SIZE
;
133 ds
->bts_buffer_base
= (u64
)(unsigned long)buffer
;
134 ds
->bts_index
= ds
->bts_buffer_base
;
135 ds
->bts_absolute_maximum
= ds
->bts_buffer_base
+
136 max
* BTS_RECORD_SIZE
;
137 ds
->bts_interrupt_threshold
= ds
->bts_absolute_maximum
-
138 thresh
* BTS_RECORD_SIZE
;
142 buffer
= kzalloc(PEBS_BUFFER_SIZE
, GFP_KERNEL
);
143 if (unlikely(!buffer
))
146 max
= PEBS_BUFFER_SIZE
/ x86_pmu
.pebs_record_size
;
148 ds
->pebs_buffer_base
= (u64
)(unsigned long)buffer
;
149 ds
->pebs_index
= ds
->pebs_buffer_base
;
150 ds
->pebs_absolute_maximum
= ds
->pebs_buffer_base
+
151 max
* x86_pmu
.pebs_record_size
;
153 * Always use single record PEBS
155 ds
->pebs_interrupt_threshold
= ds
->pebs_buffer_base
+
156 x86_pmu
.pebs_record_size
;
163 release_ds_buffers();
165 for_each_online_cpu(cpu
)
166 init_debug_store_on_cpu(cpu
);
178 static struct event_constraint bts_constraint
=
179 EVENT_CONSTRAINT(0, 1ULL << X86_PMC_IDX_FIXED_BTS
, 0);
181 static void intel_pmu_enable_bts(u64 config
)
183 unsigned long debugctlmsr
;
185 debugctlmsr
= get_debugctlmsr();
187 debugctlmsr
|= DEBUGCTLMSR_TR
;
188 debugctlmsr
|= DEBUGCTLMSR_BTS
;
189 debugctlmsr
|= DEBUGCTLMSR_BTINT
;
191 if (!(config
& ARCH_PERFMON_EVENTSEL_OS
))
192 debugctlmsr
|= DEBUGCTLMSR_BTS_OFF_OS
;
194 if (!(config
& ARCH_PERFMON_EVENTSEL_USR
))
195 debugctlmsr
|= DEBUGCTLMSR_BTS_OFF_USR
;
197 update_debugctlmsr(debugctlmsr
);
200 static void intel_pmu_disable_bts(void)
202 struct cpu_hw_events
*cpuc
= &__get_cpu_var(cpu_hw_events
);
203 unsigned long debugctlmsr
;
208 debugctlmsr
= get_debugctlmsr();
211 ~(DEBUGCTLMSR_TR
| DEBUGCTLMSR_BTS
| DEBUGCTLMSR_BTINT
|
212 DEBUGCTLMSR_BTS_OFF_OS
| DEBUGCTLMSR_BTS_OFF_USR
);
214 update_debugctlmsr(debugctlmsr
);
217 static int intel_pmu_drain_bts_buffer(void)
219 struct cpu_hw_events
*cpuc
= &__get_cpu_var(cpu_hw_events
);
220 struct debug_store
*ds
= cpuc
->ds
;
226 struct perf_event
*event
= cpuc
->events
[X86_PMC_IDX_FIXED_BTS
];
227 struct bts_record
*at
, *top
;
228 struct perf_output_handle handle
;
229 struct perf_event_header header
;
230 struct perf_sample_data data
;
239 at
= (struct bts_record
*)(unsigned long)ds
->bts_buffer_base
;
240 top
= (struct bts_record
*)(unsigned long)ds
->bts_index
;
245 ds
->bts_index
= ds
->bts_buffer_base
;
247 perf_sample_data_init(&data
, 0);
248 data
.period
= event
->hw
.last_period
;
252 * Prepare a generic sample, i.e. fill in the invariant fields.
253 * We will overwrite the from and to address before we output
256 perf_prepare_sample(&header
, &data
, event
, ®s
);
258 if (perf_output_begin(&handle
, event
, header
.size
* (top
- at
), 1, 1))
261 for (; at
< top
; at
++) {
265 perf_output_sample(&handle
, &header
, &data
, event
);
268 perf_output_end(&handle
);
270 /* There's new data available. */
271 event
->hw
.interrupts
++;
272 event
->pending_kill
= POLL_IN
;
280 static struct event_constraint intel_core_pebs_events
[] = {
281 PEBS_EVENT_CONSTRAINT(0x00c0, 0x1), /* INSTR_RETIRED.ANY */
282 PEBS_EVENT_CONSTRAINT(0xfec1, 0x1), /* X87_OPS_RETIRED.ANY */
283 PEBS_EVENT_CONSTRAINT(0x00c5, 0x1), /* BR_INST_RETIRED.MISPRED */
284 PEBS_EVENT_CONSTRAINT(0x1fc7, 0x1), /* SIMD_INST_RETURED.ANY */
285 PEBS_EVENT_CONSTRAINT(0x01cb, 0x1), /* MEM_LOAD_RETIRED.L1D_MISS */
286 PEBS_EVENT_CONSTRAINT(0x02cb, 0x1), /* MEM_LOAD_RETIRED.L1D_LINE_MISS */
287 PEBS_EVENT_CONSTRAINT(0x04cb, 0x1), /* MEM_LOAD_RETIRED.L2_MISS */
288 PEBS_EVENT_CONSTRAINT(0x08cb, 0x1), /* MEM_LOAD_RETIRED.L2_LINE_MISS */
289 PEBS_EVENT_CONSTRAINT(0x10cb, 0x1), /* MEM_LOAD_RETIRED.DTLB_MISS */
293 static struct event_constraint intel_nehalem_pebs_events
[] = {
294 PEBS_EVENT_CONSTRAINT(0x00c0, 0xf), /* INSTR_RETIRED.ANY */
295 PEBS_EVENT_CONSTRAINT(0xfec1, 0xf), /* X87_OPS_RETIRED.ANY */
296 PEBS_EVENT_CONSTRAINT(0x00c5, 0xf), /* BR_INST_RETIRED.MISPRED */
297 PEBS_EVENT_CONSTRAINT(0x1fc7, 0xf), /* SIMD_INST_RETURED.ANY */
298 PEBS_EVENT_CONSTRAINT(0x01cb, 0xf), /* MEM_LOAD_RETIRED.L1D_MISS */
299 PEBS_EVENT_CONSTRAINT(0x02cb, 0xf), /* MEM_LOAD_RETIRED.L1D_LINE_MISS */
300 PEBS_EVENT_CONSTRAINT(0x04cb, 0xf), /* MEM_LOAD_RETIRED.L2_MISS */
301 PEBS_EVENT_CONSTRAINT(0x08cb, 0xf), /* MEM_LOAD_RETIRED.L2_LINE_MISS */
302 PEBS_EVENT_CONSTRAINT(0x10cb, 0xf), /* MEM_LOAD_RETIRED.DTLB_MISS */
306 static struct event_constraint
*
307 intel_pebs_constraints(struct perf_event
*event
)
309 struct event_constraint
*c
;
311 if (!event
->attr
.precise_ip
)
314 if (x86_pmu
.pebs_constraints
) {
315 for_each_event_constraint(c
, x86_pmu
.pebs_constraints
) {
316 if ((event
->hw
.config
& c
->cmask
) == c
->code
)
321 return &emptyconstraint
;
324 static void intel_pmu_pebs_enable(struct perf_event
*event
)
326 struct cpu_hw_events
*cpuc
= &__get_cpu_var(cpu_hw_events
);
327 struct hw_perf_event
*hwc
= &event
->hw
;
329 hwc
->config
&= ~ARCH_PERFMON_EVENTSEL_INT
;
331 cpuc
->pebs_enabled
|= 1ULL << hwc
->idx
;
332 WARN_ON_ONCE(cpuc
->enabled
);
334 if (x86_pmu
.intel_cap
.pebs_trap
&& event
->attr
.precise_ip
> 1)
335 intel_pmu_lbr_enable(event
);
338 static void intel_pmu_pebs_disable(struct perf_event
*event
)
340 struct cpu_hw_events
*cpuc
= &__get_cpu_var(cpu_hw_events
);
341 struct hw_perf_event
*hwc
= &event
->hw
;
343 cpuc
->pebs_enabled
&= ~(1ULL << hwc
->idx
);
345 wrmsrl(MSR_IA32_PEBS_ENABLE
, cpuc
->pebs_enabled
);
347 hwc
->config
|= ARCH_PERFMON_EVENTSEL_INT
;
349 if (x86_pmu
.intel_cap
.pebs_trap
&& event
->attr
.precise_ip
> 1)
350 intel_pmu_lbr_disable(event
);
353 static void intel_pmu_pebs_enable_all(void)
355 struct cpu_hw_events
*cpuc
= &__get_cpu_var(cpu_hw_events
);
357 if (cpuc
->pebs_enabled
)
358 wrmsrl(MSR_IA32_PEBS_ENABLE
, cpuc
->pebs_enabled
);
361 static void intel_pmu_pebs_disable_all(void)
363 struct cpu_hw_events
*cpuc
= &__get_cpu_var(cpu_hw_events
);
365 if (cpuc
->pebs_enabled
)
366 wrmsrl(MSR_IA32_PEBS_ENABLE
, 0);
369 #include <asm/insn.h>
371 static inline bool kernel_ip(unsigned long ip
)
374 return ip
> PAGE_OFFSET
;
380 static int intel_pmu_pebs_fixup_ip(struct pt_regs
*regs
)
382 struct cpu_hw_events
*cpuc
= &__get_cpu_var(cpu_hw_events
);
383 unsigned long from
= cpuc
->lbr_entries
[0].from
;
384 unsigned long old_to
, to
= cpuc
->lbr_entries
[0].to
;
385 unsigned long ip
= regs
->ip
;
388 * We don't need to fixup if the PEBS assist is fault like
390 if (!x86_pmu
.intel_cap
.pebs_trap
)
394 * No LBR entry, no basic block, no rewinding
396 if (!cpuc
->lbr_stack
.nr
|| !from
|| !to
)
400 * Basic blocks should never cross user/kernel boundaries
402 if (kernel_ip(ip
) != kernel_ip(to
))
406 * unsigned math, either ip is before the start (impossible) or
407 * the basic block is larger than 1 page (sanity)
409 if ((ip
- to
) > PAGE_SIZE
)
413 * We sampled a branch insn, rewind using the LBR stack
422 u8 buf
[MAX_INSN_SIZE
];
426 if (!kernel_ip(ip
)) {
427 int bytes
, size
= MAX_INSN_SIZE
;
429 bytes
= copy_from_user_nmi(buf
, (void __user
*)to
, size
);
437 kernel_insn_init(&insn
, kaddr
);
438 insn_get_length(&insn
);
448 * Even though we decoded the basic block, the instruction stream
449 * never matched the given IP, either the TO or the IP got corrupted.
454 static int intel_pmu_save_and_restart(struct perf_event
*event
);
456 static void __intel_pmu_pebs_event(struct perf_event
*event
,
457 struct pt_regs
*iregs
, void *__pebs
)
460 * We cast to pebs_record_core since that is a subset of
461 * both formats and we don't use the other fields in this
464 struct pebs_record_core
*pebs
= __pebs
;
465 struct perf_sample_data data
;
468 if (!intel_pmu_save_and_restart(event
))
471 perf_sample_data_init(&data
, 0);
472 data
.period
= event
->hw
.last_period
;
475 * We use the interrupt regs as a base because the PEBS record
476 * does not contain a full regs set, specifically it seems to
477 * lack segment descriptors, which get used by things like
480 * In the simple case fix up only the IP and BP,SP regs, for
481 * PERF_SAMPLE_IP and PERF_SAMPLE_CALLCHAIN to function properly.
482 * A possible PERF_SAMPLE_REGS will have to transfer all regs.
489 if (event
->attr
.precise_ip
> 1 && intel_pmu_pebs_fixup_ip(®s
))
490 regs
.flags
|= PERF_EFLAGS_EXACT
;
492 regs
.flags
&= ~PERF_EFLAGS_EXACT
;
494 if (perf_event_overflow(event
, 1, &data
, ®s
))
495 x86_pmu_stop(event
, 0);
498 static void intel_pmu_drain_pebs_core(struct pt_regs
*iregs
)
500 struct cpu_hw_events
*cpuc
= &__get_cpu_var(cpu_hw_events
);
501 struct debug_store
*ds
= cpuc
->ds
;
502 struct perf_event
*event
= cpuc
->events
[0]; /* PMC0 only */
503 struct pebs_record_core
*at
, *top
;
506 if (!ds
|| !x86_pmu
.pebs
)
509 at
= (struct pebs_record_core
*)(unsigned long)ds
->pebs_buffer_base
;
510 top
= (struct pebs_record_core
*)(unsigned long)ds
->pebs_index
;
513 * Whatever else happens, drain the thing
515 ds
->pebs_index
= ds
->pebs_buffer_base
;
517 if (!test_bit(0, cpuc
->active_mask
))
520 WARN_ON_ONCE(!event
);
522 if (!event
->attr
.precise_ip
)
530 * Should not happen, we program the threshold at 1 and do not
536 __intel_pmu_pebs_event(event
, iregs
, at
);
539 static void intel_pmu_drain_pebs_nhm(struct pt_regs
*iregs
)
541 struct cpu_hw_events
*cpuc
= &__get_cpu_var(cpu_hw_events
);
542 struct debug_store
*ds
= cpuc
->ds
;
543 struct pebs_record_nhm
*at
, *top
;
544 struct perf_event
*event
= NULL
;
548 if (!ds
|| !x86_pmu
.pebs
)
551 at
= (struct pebs_record_nhm
*)(unsigned long)ds
->pebs_buffer_base
;
552 top
= (struct pebs_record_nhm
*)(unsigned long)ds
->pebs_index
;
554 ds
->pebs_index
= ds
->pebs_buffer_base
;
561 * Should not happen, we program the threshold at 1 and do not
564 WARN_ON_ONCE(n
> MAX_PEBS_EVENTS
);
566 for ( ; at
< top
; at
++) {
567 for_each_set_bit(bit
, (unsigned long *)&at
->status
, MAX_PEBS_EVENTS
) {
568 event
= cpuc
->events
[bit
];
569 if (!test_bit(bit
, cpuc
->active_mask
))
572 WARN_ON_ONCE(!event
);
574 if (!event
->attr
.precise_ip
)
577 if (__test_and_set_bit(bit
, (unsigned long *)&status
))
583 if (!event
|| bit
>= MAX_PEBS_EVENTS
)
586 __intel_pmu_pebs_event(event
, iregs
, at
);
591 * BTS, PEBS probe and setup
594 static void intel_ds_init(void)
597 * No support for 32bit formats
599 if (!boot_cpu_has(X86_FEATURE_DTES64
))
602 x86_pmu
.bts
= boot_cpu_has(X86_FEATURE_BTS
);
603 x86_pmu
.pebs
= boot_cpu_has(X86_FEATURE_PEBS
);
605 char pebs_type
= x86_pmu
.intel_cap
.pebs_trap
? '+' : '-';
606 int format
= x86_pmu
.intel_cap
.pebs_format
;
610 printk(KERN_CONT
"PEBS fmt0%c, ", pebs_type
);
611 x86_pmu
.pebs_record_size
= sizeof(struct pebs_record_core
);
612 x86_pmu
.drain_pebs
= intel_pmu_drain_pebs_core
;
613 x86_pmu
.pebs_constraints
= intel_core_pebs_events
;
617 printk(KERN_CONT
"PEBS fmt1%c, ", pebs_type
);
618 x86_pmu
.pebs_record_size
= sizeof(struct pebs_record_nhm
);
619 x86_pmu
.drain_pebs
= intel_pmu_drain_pebs_nhm
;
620 x86_pmu
.pebs_constraints
= intel_nehalem_pebs_events
;
624 printk(KERN_CONT
"no PEBS fmt%d%c, ", format
, pebs_type
);
631 #else /* CONFIG_CPU_SUP_INTEL */
633 static int reserve_ds_buffers(void)
638 static void release_ds_buffers(void)
642 #endif /* CONFIG_CPU_SUP_INTEL */