1 // SPDX-License-Identifier: GPL-2.0-only
3 * KVM PMU support for Intel CPUs
5 * Copyright 2011 Red Hat, Inc. and/or its affiliates.
8 * Avi Kivity <avi@redhat.com>
9 * Gleb Natapov <gleb@redhat.com>
11 #include <linux/types.h>
12 #include <linux/kvm_host.h>
13 #include <linux/perf_event.h>
14 #include <asm/perf_event.h>
21 #define MSR_PMC_FULL_WIDTH_BIT (MSR_IA32_PMC0 - MSR_IA32_PERFCTR0)
23 static struct kvm_event_hw_type_mapping intel_arch_events
[] = {
24 [0] = { 0x3c, 0x00, PERF_COUNT_HW_CPU_CYCLES
},
25 [1] = { 0xc0, 0x00, PERF_COUNT_HW_INSTRUCTIONS
},
26 [2] = { 0x3c, 0x01, PERF_COUNT_HW_BUS_CYCLES
},
27 [3] = { 0x2e, 0x4f, PERF_COUNT_HW_CACHE_REFERENCES
},
28 [4] = { 0x2e, 0x41, PERF_COUNT_HW_CACHE_MISSES
},
29 [5] = { 0xc4, 0x00, PERF_COUNT_HW_BRANCH_INSTRUCTIONS
},
30 [6] = { 0xc5, 0x00, PERF_COUNT_HW_BRANCH_MISSES
},
31 /* The above index must match CPUID 0x0A.EBX bit vector */
32 [7] = { 0x00, 0x03, PERF_COUNT_HW_REF_CPU_CYCLES
},
35 /* mapping between fixed pmc index and intel_arch_events array */
36 static int fixed_pmc_events
[] = {1, 0, 7};
38 static void reprogram_fixed_counters(struct kvm_pmu
*pmu
, u64 data
)
42 for (i
= 0; i
< pmu
->nr_arch_fixed_counters
; i
++) {
43 u8 new_ctrl
= fixed_ctrl_field(data
, i
);
44 u8 old_ctrl
= fixed_ctrl_field(pmu
->fixed_ctr_ctrl
, i
);
47 pmc
= get_fixed_pmc(pmu
, MSR_CORE_PERF_FIXED_CTR0
+ i
);
49 if (old_ctrl
== new_ctrl
)
52 __set_bit(INTEL_PMC_IDX_FIXED
+ i
, pmu
->pmc_in_use
);
53 reprogram_fixed_counter(pmc
, new_ctrl
, i
);
56 pmu
->fixed_ctr_ctrl
= data
;
59 /* function is called when global control register has been updated. */
60 static void global_ctrl_changed(struct kvm_pmu
*pmu
, u64 data
)
63 u64 diff
= pmu
->global_ctrl
^ data
;
65 pmu
->global_ctrl
= data
;
67 for_each_set_bit(bit
, (unsigned long *)&diff
, X86_PMC_IDX_MAX
)
68 reprogram_counter(pmu
, bit
);
71 static unsigned int intel_pmc_perf_hw_id(struct kvm_pmc
*pmc
)
73 struct kvm_pmu
*pmu
= pmc_to_pmu(pmc
);
74 u8 event_select
= pmc
->eventsel
& ARCH_PERFMON_EVENTSEL_EVENT
;
75 u8 unit_mask
= (pmc
->eventsel
& ARCH_PERFMON_EVENTSEL_UMASK
) >> 8;
78 for (i
= 0; i
< ARRAY_SIZE(intel_arch_events
); i
++) {
79 if (intel_arch_events
[i
].eventsel
!= event_select
||
80 intel_arch_events
[i
].unit_mask
!= unit_mask
)
83 /* disable event that reported as not present by cpuid */
84 if ((i
< 7) && !(pmu
->available_event_types
& (1 << i
)))
85 return PERF_COUNT_HW_MAX
+ 1;
90 if (i
== ARRAY_SIZE(intel_arch_events
))
91 return PERF_COUNT_HW_MAX
;
93 return intel_arch_events
[i
].event_type
;
96 /* check if a PMC is enabled by comparing it with globl_ctrl bits. */
97 static bool intel_pmc_is_enabled(struct kvm_pmc
*pmc
)
99 struct kvm_pmu
*pmu
= pmc_to_pmu(pmc
);
101 return test_bit(pmc
->idx
, (unsigned long *)&pmu
->global_ctrl
);
104 static struct kvm_pmc
*intel_pmc_idx_to_pmc(struct kvm_pmu
*pmu
, int pmc_idx
)
106 if (pmc_idx
< INTEL_PMC_IDX_FIXED
)
107 return get_gp_pmc(pmu
, MSR_P6_EVNTSEL0
+ pmc_idx
,
110 u32 idx
= pmc_idx
- INTEL_PMC_IDX_FIXED
;
112 return get_fixed_pmc(pmu
, idx
+ MSR_CORE_PERF_FIXED_CTR0
);
116 static bool intel_is_valid_rdpmc_ecx(struct kvm_vcpu
*vcpu
, unsigned int idx
)
118 struct kvm_pmu
*pmu
= vcpu_to_pmu(vcpu
);
119 bool fixed
= idx
& (1u << 30);
123 return fixed
? idx
< pmu
->nr_arch_fixed_counters
124 : idx
< pmu
->nr_arch_gp_counters
;
127 static struct kvm_pmc
*intel_rdpmc_ecx_to_pmc(struct kvm_vcpu
*vcpu
,
128 unsigned int idx
, u64
*mask
)
130 struct kvm_pmu
*pmu
= vcpu_to_pmu(vcpu
);
131 bool fixed
= idx
& (1u << 30);
132 struct kvm_pmc
*counters
;
133 unsigned int num_counters
;
137 counters
= pmu
->fixed_counters
;
138 num_counters
= pmu
->nr_arch_fixed_counters
;
140 counters
= pmu
->gp_counters
;
141 num_counters
= pmu
->nr_arch_gp_counters
;
143 if (idx
>= num_counters
)
145 *mask
&= pmu
->counter_bitmask
[fixed
? KVM_PMC_FIXED
: KVM_PMC_GP
];
146 return &counters
[array_index_nospec(idx
, num_counters
)];
149 static inline u64
vcpu_get_perf_capabilities(struct kvm_vcpu
*vcpu
)
151 if (!guest_cpuid_has(vcpu
, X86_FEATURE_PDCM
))
154 return vcpu
->arch
.perf_capabilities
;
157 static inline bool fw_writes_is_enabled(struct kvm_vcpu
*vcpu
)
159 return (vcpu_get_perf_capabilities(vcpu
) & PMU_CAP_FW_WRITES
) != 0;
162 static inline struct kvm_pmc
*get_fw_gp_pmc(struct kvm_pmu
*pmu
, u32 msr
)
164 if (!fw_writes_is_enabled(pmu_to_vcpu(pmu
)))
167 return get_gp_pmc(pmu
, msr
, MSR_IA32_PMC0
);
170 bool intel_pmu_lbr_is_compatible(struct kvm_vcpu
*vcpu
)
173 * As a first step, a guest could only enable LBR feature if its
174 * cpu model is the same as the host because the LBR registers
175 * would be pass-through to the guest and they're model specific.
177 return boot_cpu_data
.x86_model
== guest_cpuid_model(vcpu
);
180 bool intel_pmu_lbr_is_enabled(struct kvm_vcpu
*vcpu
)
182 struct x86_pmu_lbr
*lbr
= vcpu_to_lbr_records(vcpu
);
184 return lbr
->nr
&& (vcpu_get_perf_capabilities(vcpu
) & PMU_CAP_LBR_FMT
);
187 static bool intel_pmu_is_valid_lbr_msr(struct kvm_vcpu
*vcpu
, u32 index
)
189 struct x86_pmu_lbr
*records
= vcpu_to_lbr_records(vcpu
);
192 if (!intel_pmu_lbr_is_enabled(vcpu
))
195 ret
= (index
== MSR_LBR_SELECT
) || (index
== MSR_LBR_TOS
) ||
196 (index
>= records
->from
&& index
< records
->from
+ records
->nr
) ||
197 (index
>= records
->to
&& index
< records
->to
+ records
->nr
);
199 if (!ret
&& records
->info
)
200 ret
= (index
>= records
->info
&& index
< records
->info
+ records
->nr
);
205 static bool intel_is_valid_msr(struct kvm_vcpu
*vcpu
, u32 msr
)
207 struct kvm_pmu
*pmu
= vcpu_to_pmu(vcpu
);
211 case MSR_CORE_PERF_FIXED_CTR_CTRL
:
212 case MSR_CORE_PERF_GLOBAL_STATUS
:
213 case MSR_CORE_PERF_GLOBAL_CTRL
:
214 case MSR_CORE_PERF_GLOBAL_OVF_CTRL
:
215 ret
= pmu
->version
> 1;
218 ret
= get_gp_pmc(pmu
, msr
, MSR_IA32_PERFCTR0
) ||
219 get_gp_pmc(pmu
, msr
, MSR_P6_EVNTSEL0
) ||
220 get_fixed_pmc(pmu
, msr
) || get_fw_gp_pmc(pmu
, msr
) ||
221 intel_pmu_is_valid_lbr_msr(vcpu
, msr
);
228 static struct kvm_pmc
*intel_msr_idx_to_pmc(struct kvm_vcpu
*vcpu
, u32 msr
)
230 struct kvm_pmu
*pmu
= vcpu_to_pmu(vcpu
);
233 pmc
= get_fixed_pmc(pmu
, msr
);
234 pmc
= pmc
? pmc
: get_gp_pmc(pmu
, msr
, MSR_P6_EVNTSEL0
);
235 pmc
= pmc
? pmc
: get_gp_pmc(pmu
, msr
, MSR_IA32_PERFCTR0
);
240 static inline void intel_pmu_release_guest_lbr_event(struct kvm_vcpu
*vcpu
)
242 struct lbr_desc
*lbr_desc
= vcpu_to_lbr_desc(vcpu
);
244 if (lbr_desc
->event
) {
245 perf_event_release_kernel(lbr_desc
->event
);
246 lbr_desc
->event
= NULL
;
247 vcpu_to_pmu(vcpu
)->event_count
--;
251 int intel_pmu_create_guest_lbr_event(struct kvm_vcpu
*vcpu
)
253 struct lbr_desc
*lbr_desc
= vcpu_to_lbr_desc(vcpu
);
254 struct kvm_pmu
*pmu
= vcpu_to_pmu(vcpu
);
255 struct perf_event
*event
;
258 * The perf_event_attr is constructed in the minimum efficient way:
259 * - set 'pinned = true' to make it task pinned so that if another
260 * cpu pinned event reclaims LBR, the event->oncpu will be set to -1;
261 * - set '.exclude_host = true' to record guest branches behavior;
263 * - set '.config = INTEL_FIXED_VLBR_EVENT' to indicates host perf
264 * schedule the event without a real HW counter but a fake one;
265 * check is_guest_lbr_event() and __intel_get_event_constraints();
267 * - set 'sample_type = PERF_SAMPLE_BRANCH_STACK' and
268 * 'branch_sample_type = PERF_SAMPLE_BRANCH_CALL_STACK |
269 * PERF_SAMPLE_BRANCH_USER' to configure it as a LBR callstack
270 * event, which helps KVM to save/restore guest LBR records
271 * during host context switches and reduces quite a lot overhead,
272 * check branch_user_callstack() and intel_pmu_lbr_sched_task();
274 struct perf_event_attr attr
= {
275 .type
= PERF_TYPE_RAW
,
276 .size
= sizeof(attr
),
277 .config
= INTEL_FIXED_VLBR_EVENT
,
278 .sample_type
= PERF_SAMPLE_BRANCH_STACK
,
280 .exclude_host
= true,
281 .branch_sample_type
= PERF_SAMPLE_BRANCH_CALL_STACK
|
282 PERF_SAMPLE_BRANCH_USER
,
285 if (unlikely(lbr_desc
->event
)) {
286 __set_bit(INTEL_PMC_IDX_FIXED_VLBR
, pmu
->pmc_in_use
);
290 event
= perf_event_create_kernel_counter(&attr
, -1,
291 current
, NULL
, NULL
);
293 pr_debug_ratelimited("%s: failed %ld\n",
294 __func__
, PTR_ERR(event
));
295 return PTR_ERR(event
);
297 lbr_desc
->event
= event
;
299 __set_bit(INTEL_PMC_IDX_FIXED_VLBR
, pmu
->pmc_in_use
);
304 * It's safe to access LBR msrs from guest when they have not
305 * been passthrough since the host would help restore or reset
306 * the LBR msrs records when the guest LBR event is scheduled in.
308 static bool intel_pmu_handle_lbr_msrs_access(struct kvm_vcpu
*vcpu
,
309 struct msr_data
*msr_info
, bool read
)
311 struct lbr_desc
*lbr_desc
= vcpu_to_lbr_desc(vcpu
);
312 u32 index
= msr_info
->index
;
314 if (!intel_pmu_is_valid_lbr_msr(vcpu
, index
))
317 if (!lbr_desc
->event
&& intel_pmu_create_guest_lbr_event(vcpu
) < 0)
321 * Disable irq to ensure the LBR feature doesn't get reclaimed by the
322 * host at the time the value is read from the msr, and this avoids the
323 * host LBR value to be leaked to the guest. If LBR has been reclaimed,
324 * return 0 on guest reads.
327 if (lbr_desc
->event
->state
== PERF_EVENT_STATE_ACTIVE
) {
329 rdmsrl(index
, msr_info
->data
);
331 wrmsrl(index
, msr_info
->data
);
332 __set_bit(INTEL_PMC_IDX_FIXED_VLBR
, vcpu_to_pmu(vcpu
)->pmc_in_use
);
336 clear_bit(INTEL_PMC_IDX_FIXED_VLBR
, vcpu_to_pmu(vcpu
)->pmc_in_use
);
345 static int intel_pmu_get_msr(struct kvm_vcpu
*vcpu
, struct msr_data
*msr_info
)
347 struct kvm_pmu
*pmu
= vcpu_to_pmu(vcpu
);
349 u32 msr
= msr_info
->index
;
352 case MSR_CORE_PERF_FIXED_CTR_CTRL
:
353 msr_info
->data
= pmu
->fixed_ctr_ctrl
;
355 case MSR_CORE_PERF_GLOBAL_STATUS
:
356 msr_info
->data
= pmu
->global_status
;
358 case MSR_CORE_PERF_GLOBAL_CTRL
:
359 msr_info
->data
= pmu
->global_ctrl
;
361 case MSR_CORE_PERF_GLOBAL_OVF_CTRL
:
365 if ((pmc
= get_gp_pmc(pmu
, msr
, MSR_IA32_PERFCTR0
)) ||
366 (pmc
= get_gp_pmc(pmu
, msr
, MSR_IA32_PMC0
))) {
367 u64 val
= pmc_read_counter(pmc
);
369 val
& pmu
->counter_bitmask
[KVM_PMC_GP
];
371 } else if ((pmc
= get_fixed_pmc(pmu
, msr
))) {
372 u64 val
= pmc_read_counter(pmc
);
374 val
& pmu
->counter_bitmask
[KVM_PMC_FIXED
];
376 } else if ((pmc
= get_gp_pmc(pmu
, msr
, MSR_P6_EVNTSEL0
))) {
377 msr_info
->data
= pmc
->eventsel
;
379 } else if (intel_pmu_handle_lbr_msrs_access(vcpu
, msr_info
, true))
386 static int intel_pmu_set_msr(struct kvm_vcpu
*vcpu
, struct msr_data
*msr_info
)
388 struct kvm_pmu
*pmu
= vcpu_to_pmu(vcpu
);
390 u32 msr
= msr_info
->index
;
391 u64 data
= msr_info
->data
;
395 case MSR_CORE_PERF_FIXED_CTR_CTRL
:
396 if (pmu
->fixed_ctr_ctrl
== data
)
398 if (!(data
& 0xfffffffffffff444ull
)) {
399 reprogram_fixed_counters(pmu
, data
);
403 case MSR_CORE_PERF_GLOBAL_STATUS
:
404 if (msr_info
->host_initiated
) {
405 pmu
->global_status
= data
;
409 case MSR_CORE_PERF_GLOBAL_CTRL
:
410 if (pmu
->global_ctrl
== data
)
412 if (kvm_valid_perf_global_ctrl(pmu
, data
)) {
413 global_ctrl_changed(pmu
, data
);
417 case MSR_CORE_PERF_GLOBAL_OVF_CTRL
:
418 if (!(data
& pmu
->global_ovf_ctrl_mask
)) {
419 if (!msr_info
->host_initiated
)
420 pmu
->global_status
&= ~data
;
425 if ((pmc
= get_gp_pmc(pmu
, msr
, MSR_IA32_PERFCTR0
)) ||
426 (pmc
= get_gp_pmc(pmu
, msr
, MSR_IA32_PMC0
))) {
427 if ((msr
& MSR_PMC_FULL_WIDTH_BIT
) &&
428 (data
& ~pmu
->counter_bitmask
[KVM_PMC_GP
]))
430 if (!msr_info
->host_initiated
&&
431 !(msr
& MSR_PMC_FULL_WIDTH_BIT
))
432 data
= (s64
)(s32
)data
;
433 pmc
->counter
+= data
- pmc_read_counter(pmc
);
434 if (pmc
->perf_event
&& !pmc
->is_paused
)
435 perf_event_period(pmc
->perf_event
,
436 get_sample_period(pmc
, data
));
438 } else if ((pmc
= get_fixed_pmc(pmu
, msr
))) {
439 pmc
->counter
+= data
- pmc_read_counter(pmc
);
440 if (pmc
->perf_event
&& !pmc
->is_paused
)
441 perf_event_period(pmc
->perf_event
,
442 get_sample_period(pmc
, data
));
444 } else if ((pmc
= get_gp_pmc(pmu
, msr
, MSR_P6_EVNTSEL0
))) {
445 if (data
== pmc
->eventsel
)
447 reserved_bits
= pmu
->reserved_bits
;
448 if ((pmc
->idx
== 2) &&
449 (pmu
->raw_event_mask
& HSW_IN_TX_CHECKPOINTED
))
450 reserved_bits
^= HSW_IN_TX_CHECKPOINTED
;
451 if (!(data
& reserved_bits
)) {
452 reprogram_gp_counter(pmc
, data
);
455 } else if (intel_pmu_handle_lbr_msrs_access(vcpu
, msr_info
, false))
462 static void setup_fixed_pmc_eventsel(struct kvm_pmu
*pmu
)
464 size_t size
= ARRAY_SIZE(fixed_pmc_events
);
469 for (i
= 0; i
< pmu
->nr_arch_fixed_counters
; i
++) {
470 pmc
= &pmu
->fixed_counters
[i
];
471 event
= fixed_pmc_events
[array_index_nospec(i
, size
)];
472 pmc
->eventsel
= (intel_arch_events
[event
].unit_mask
<< 8) |
473 intel_arch_events
[event
].eventsel
;
477 static void intel_pmu_refresh(struct kvm_vcpu
*vcpu
)
479 struct kvm_pmu
*pmu
= vcpu_to_pmu(vcpu
);
480 struct lbr_desc
*lbr_desc
= vcpu_to_lbr_desc(vcpu
);
482 struct x86_pmu_capability x86_pmu
;
483 struct kvm_cpuid_entry2
*entry
;
484 union cpuid10_eax eax
;
485 union cpuid10_edx edx
;
487 pmu
->nr_arch_gp_counters
= 0;
488 pmu
->nr_arch_fixed_counters
= 0;
489 pmu
->counter_bitmask
[KVM_PMC_GP
] = 0;
490 pmu
->counter_bitmask
[KVM_PMC_FIXED
] = 0;
492 pmu
->reserved_bits
= 0xffffffff00200000ull
;
493 pmu
->raw_event_mask
= X86_RAW_EVENT_MASK
;
495 entry
= kvm_find_cpuid_entry(vcpu
, 0xa, 0);
496 if (!entry
|| !vcpu
->kvm
->arch
.enable_pmu
)
498 eax
.full
= entry
->eax
;
499 edx
.full
= entry
->edx
;
501 pmu
->version
= eax
.split
.version_id
;
505 perf_get_x86_pmu_capability(&x86_pmu
);
507 pmu
->nr_arch_gp_counters
= min_t(int, eax
.split
.num_counters
,
508 x86_pmu
.num_counters_gp
);
509 eax
.split
.bit_width
= min_t(int, eax
.split
.bit_width
, x86_pmu
.bit_width_gp
);
510 pmu
->counter_bitmask
[KVM_PMC_GP
] = ((u64
)1 << eax
.split
.bit_width
) - 1;
511 eax
.split
.mask_length
= min_t(int, eax
.split
.mask_length
, x86_pmu
.events_mask_len
);
512 pmu
->available_event_types
= ~entry
->ebx
&
513 ((1ull << eax
.split
.mask_length
) - 1);
515 if (pmu
->version
== 1) {
516 pmu
->nr_arch_fixed_counters
= 0;
518 pmu
->nr_arch_fixed_counters
=
519 min3(ARRAY_SIZE(fixed_pmc_events
),
520 (size_t) edx
.split
.num_counters_fixed
,
521 (size_t) x86_pmu
.num_counters_fixed
);
522 edx
.split
.bit_width_fixed
= min_t(int,
523 edx
.split
.bit_width_fixed
, x86_pmu
.bit_width_fixed
);
524 pmu
->counter_bitmask
[KVM_PMC_FIXED
] =
525 ((u64
)1 << edx
.split
.bit_width_fixed
) - 1;
526 setup_fixed_pmc_eventsel(pmu
);
529 pmu
->global_ctrl
= ((1ull << pmu
->nr_arch_gp_counters
) - 1) |
530 (((1ull << pmu
->nr_arch_fixed_counters
) - 1) << INTEL_PMC_IDX_FIXED
);
531 pmu
->global_ctrl_mask
= ~pmu
->global_ctrl
;
532 pmu
->global_ovf_ctrl_mask
= pmu
->global_ctrl_mask
533 & ~(MSR_CORE_PERF_GLOBAL_OVF_CTRL_OVF_BUF
|
534 MSR_CORE_PERF_GLOBAL_OVF_CTRL_COND_CHGD
);
535 if (vmx_pt_mode_is_host_guest())
536 pmu
->global_ovf_ctrl_mask
&=
537 ~MSR_CORE_PERF_GLOBAL_OVF_CTRL_TRACE_TOPA_PMI
;
539 entry
= kvm_find_cpuid_entry(vcpu
, 7, 0);
541 (boot_cpu_has(X86_FEATURE_HLE
) || boot_cpu_has(X86_FEATURE_RTM
)) &&
542 (entry
->ebx
& (X86_FEATURE_HLE
|X86_FEATURE_RTM
))) {
543 pmu
->reserved_bits
^= HSW_IN_TX
;
544 pmu
->raw_event_mask
|= (HSW_IN_TX
|HSW_IN_TX_CHECKPOINTED
);
547 bitmap_set(pmu
->all_valid_pmc_idx
,
548 0, pmu
->nr_arch_gp_counters
);
549 bitmap_set(pmu
->all_valid_pmc_idx
,
550 INTEL_PMC_MAX_GENERIC
, pmu
->nr_arch_fixed_counters
);
552 nested_vmx_pmu_refresh(vcpu
,
553 intel_is_valid_msr(vcpu
, MSR_CORE_PERF_GLOBAL_CTRL
));
555 if (intel_pmu_lbr_is_compatible(vcpu
))
556 x86_perf_get_lbr(&lbr_desc
->records
);
558 lbr_desc
->records
.nr
= 0;
560 if (lbr_desc
->records
.nr
)
561 bitmap_set(pmu
->all_valid_pmc_idx
, INTEL_PMC_IDX_FIXED_VLBR
, 1);
564 static void intel_pmu_init(struct kvm_vcpu
*vcpu
)
567 struct kvm_pmu
*pmu
= vcpu_to_pmu(vcpu
);
568 struct lbr_desc
*lbr_desc
= vcpu_to_lbr_desc(vcpu
);
570 for (i
= 0; i
< INTEL_PMC_MAX_GENERIC
; i
++) {
571 pmu
->gp_counters
[i
].type
= KVM_PMC_GP
;
572 pmu
->gp_counters
[i
].vcpu
= vcpu
;
573 pmu
->gp_counters
[i
].idx
= i
;
574 pmu
->gp_counters
[i
].current_config
= 0;
577 for (i
= 0; i
< INTEL_PMC_MAX_FIXED
; i
++) {
578 pmu
->fixed_counters
[i
].type
= KVM_PMC_FIXED
;
579 pmu
->fixed_counters
[i
].vcpu
= vcpu
;
580 pmu
->fixed_counters
[i
].idx
= i
+ INTEL_PMC_IDX_FIXED
;
581 pmu
->fixed_counters
[i
].current_config
= 0;
584 vcpu
->arch
.perf_capabilities
= vmx_get_perf_capabilities();
585 lbr_desc
->records
.nr
= 0;
586 lbr_desc
->event
= NULL
;
587 lbr_desc
->msr_passthrough
= false;
590 static void intel_pmu_reset(struct kvm_vcpu
*vcpu
)
592 struct kvm_pmu
*pmu
= vcpu_to_pmu(vcpu
);
593 struct kvm_pmc
*pmc
= NULL
;
596 for (i
= 0; i
< INTEL_PMC_MAX_GENERIC
; i
++) {
597 pmc
= &pmu
->gp_counters
[i
];
599 pmc_stop_counter(pmc
);
600 pmc
->counter
= pmc
->eventsel
= 0;
603 for (i
= 0; i
< INTEL_PMC_MAX_FIXED
; i
++) {
604 pmc
= &pmu
->fixed_counters
[i
];
606 pmc_stop_counter(pmc
);
610 pmu
->fixed_ctr_ctrl
= pmu
->global_ctrl
= pmu
->global_status
= 0;
612 intel_pmu_release_guest_lbr_event(vcpu
);
616 * Emulate LBR_On_PMI behavior for 1 < pmu.version < 4.
618 * If Freeze_LBR_On_PMI = 1, the LBR is frozen on PMI and
619 * the KVM emulates to clear the LBR bit (bit 0) in IA32_DEBUGCTL.
621 * Guest needs to re-enable LBR to resume branches recording.
623 static void intel_pmu_legacy_freezing_lbrs_on_pmi(struct kvm_vcpu
*vcpu
)
625 u64 data
= vmcs_read64(GUEST_IA32_DEBUGCTL
);
627 if (data
& DEBUGCTLMSR_FREEZE_LBRS_ON_PMI
) {
628 data
&= ~DEBUGCTLMSR_LBR
;
629 vmcs_write64(GUEST_IA32_DEBUGCTL
, data
);
633 static void intel_pmu_deliver_pmi(struct kvm_vcpu
*vcpu
)
635 u8 version
= vcpu_to_pmu(vcpu
)->version
;
637 if (!intel_pmu_lbr_is_enabled(vcpu
))
640 if (version
> 1 && version
< 4)
641 intel_pmu_legacy_freezing_lbrs_on_pmi(vcpu
);
644 static void vmx_update_intercept_for_lbr_msrs(struct kvm_vcpu
*vcpu
, bool set
)
646 struct x86_pmu_lbr
*lbr
= vcpu_to_lbr_records(vcpu
);
649 for (i
= 0; i
< lbr
->nr
; i
++) {
650 vmx_set_intercept_for_msr(vcpu
, lbr
->from
+ i
, MSR_TYPE_RW
, set
);
651 vmx_set_intercept_for_msr(vcpu
, lbr
->to
+ i
, MSR_TYPE_RW
, set
);
653 vmx_set_intercept_for_msr(vcpu
, lbr
->info
+ i
, MSR_TYPE_RW
, set
);
656 vmx_set_intercept_for_msr(vcpu
, MSR_LBR_SELECT
, MSR_TYPE_RW
, set
);
657 vmx_set_intercept_for_msr(vcpu
, MSR_LBR_TOS
, MSR_TYPE_RW
, set
);
660 static inline void vmx_disable_lbr_msrs_passthrough(struct kvm_vcpu
*vcpu
)
662 struct lbr_desc
*lbr_desc
= vcpu_to_lbr_desc(vcpu
);
664 if (!lbr_desc
->msr_passthrough
)
667 vmx_update_intercept_for_lbr_msrs(vcpu
, true);
668 lbr_desc
->msr_passthrough
= false;
671 static inline void vmx_enable_lbr_msrs_passthrough(struct kvm_vcpu
*vcpu
)
673 struct lbr_desc
*lbr_desc
= vcpu_to_lbr_desc(vcpu
);
675 if (lbr_desc
->msr_passthrough
)
678 vmx_update_intercept_for_lbr_msrs(vcpu
, false);
679 lbr_desc
->msr_passthrough
= true;
683 * Higher priority host perf events (e.g. cpu pinned) could reclaim the
684 * pmu resources (e.g. LBR) that were assigned to the guest. This is
685 * usually done via ipi calls (more details in perf_install_in_context).
687 * Before entering the non-root mode (with irq disabled here), double
688 * confirm that the pmu features enabled to the guest are not reclaimed
689 * by higher priority host events. Otherwise, disallow vcpu's access to
690 * the reclaimed features.
692 void vmx_passthrough_lbr_msrs(struct kvm_vcpu
*vcpu
)
694 struct kvm_pmu
*pmu
= vcpu_to_pmu(vcpu
);
695 struct lbr_desc
*lbr_desc
= vcpu_to_lbr_desc(vcpu
);
697 if (!lbr_desc
->event
) {
698 vmx_disable_lbr_msrs_passthrough(vcpu
);
699 if (vmcs_read64(GUEST_IA32_DEBUGCTL
) & DEBUGCTLMSR_LBR
)
701 if (test_bit(INTEL_PMC_IDX_FIXED_VLBR
, pmu
->pmc_in_use
))
706 if (lbr_desc
->event
->state
< PERF_EVENT_STATE_ACTIVE
) {
707 vmx_disable_lbr_msrs_passthrough(vcpu
);
708 __clear_bit(INTEL_PMC_IDX_FIXED_VLBR
, pmu
->pmc_in_use
);
711 vmx_enable_lbr_msrs_passthrough(vcpu
);
716 pr_warn_ratelimited("kvm: vcpu-%d: fail to passthrough LBR.\n",
720 static void intel_pmu_cleanup(struct kvm_vcpu
*vcpu
)
722 if (!(vmcs_read64(GUEST_IA32_DEBUGCTL
) & DEBUGCTLMSR_LBR
))
723 intel_pmu_release_guest_lbr_event(vcpu
);
726 struct kvm_pmu_ops intel_pmu_ops
= {
727 .pmc_perf_hw_id
= intel_pmc_perf_hw_id
,
728 .pmc_is_enabled
= intel_pmc_is_enabled
,
729 .pmc_idx_to_pmc
= intel_pmc_idx_to_pmc
,
730 .rdpmc_ecx_to_pmc
= intel_rdpmc_ecx_to_pmc
,
731 .msr_idx_to_pmc
= intel_msr_idx_to_pmc
,
732 .is_valid_rdpmc_ecx
= intel_is_valid_rdpmc_ecx
,
733 .is_valid_msr
= intel_is_valid_msr
,
734 .get_msr
= intel_pmu_get_msr
,
735 .set_msr
= intel_pmu_set_msr
,
736 .refresh
= intel_pmu_refresh
,
737 .init
= intel_pmu_init
,
738 .reset
= intel_pmu_reset
,
739 .deliver_pmi
= intel_pmu_deliver_pmi
,
740 .cleanup
= intel_pmu_cleanup
,