1 // SPDX-License-Identifier: GPL-2.0-only
3 * KVM PMU support for Intel CPUs
5 * Copyright 2011 Red Hat, Inc. and/or its affiliates.
8 * Avi Kivity <avi@redhat.com>
9 * Gleb Natapov <gleb@redhat.com>
11 #include <linux/types.h>
12 #include <linux/kvm_host.h>
13 #include <linux/perf_event.h>
14 #include <asm/perf_event.h>
21 #define MSR_PMC_FULL_WIDTH_BIT (MSR_IA32_PMC0 - MSR_IA32_PERFCTR0)
23 static struct kvm_event_hw_type_mapping intel_arch_events
[] = {
24 [0] = { 0x3c, 0x00, PERF_COUNT_HW_CPU_CYCLES
},
25 [1] = { 0xc0, 0x00, PERF_COUNT_HW_INSTRUCTIONS
},
26 [2] = { 0x3c, 0x01, PERF_COUNT_HW_BUS_CYCLES
},
27 [3] = { 0x2e, 0x4f, PERF_COUNT_HW_CACHE_REFERENCES
},
28 [4] = { 0x2e, 0x41, PERF_COUNT_HW_CACHE_MISSES
},
29 [5] = { 0xc4, 0x00, PERF_COUNT_HW_BRANCH_INSTRUCTIONS
},
30 [6] = { 0xc5, 0x00, PERF_COUNT_HW_BRANCH_MISSES
},
31 /* The above index must match CPUID 0x0A.EBX bit vector */
32 [7] = { 0x00, 0x03, PERF_COUNT_HW_REF_CPU_CYCLES
},
35 /* mapping between fixed pmc index and intel_arch_events array */
36 static int fixed_pmc_events
[] = {1, 0, 7};
38 static void reprogram_fixed_counters(struct kvm_pmu
*pmu
, u64 data
)
42 for (i
= 0; i
< pmu
->nr_arch_fixed_counters
; i
++) {
43 u8 new_ctrl
= fixed_ctrl_field(data
, i
);
44 u8 old_ctrl
= fixed_ctrl_field(pmu
->fixed_ctr_ctrl
, i
);
47 pmc
= get_fixed_pmc(pmu
, MSR_CORE_PERF_FIXED_CTR0
+ i
);
49 if (old_ctrl
== new_ctrl
)
52 __set_bit(INTEL_PMC_IDX_FIXED
+ i
, pmu
->pmc_in_use
);
53 reprogram_fixed_counter(pmc
, new_ctrl
, i
);
56 pmu
->fixed_ctr_ctrl
= data
;
59 /* function is called when global control register has been updated. */
60 static void global_ctrl_changed(struct kvm_pmu
*pmu
, u64 data
)
63 u64 diff
= pmu
->global_ctrl
^ data
;
65 pmu
->global_ctrl
= data
;
67 for_each_set_bit(bit
, (unsigned long *)&diff
, X86_PMC_IDX_MAX
)
68 reprogram_counter(pmu
, bit
);
71 static unsigned int intel_pmc_perf_hw_id(struct kvm_pmc
*pmc
)
73 struct kvm_pmu
*pmu
= pmc_to_pmu(pmc
);
74 u8 event_select
= pmc
->eventsel
& ARCH_PERFMON_EVENTSEL_EVENT
;
75 u8 unit_mask
= (pmc
->eventsel
& ARCH_PERFMON_EVENTSEL_UMASK
) >> 8;
78 for (i
= 0; i
< ARRAY_SIZE(intel_arch_events
); i
++) {
79 if (intel_arch_events
[i
].eventsel
!= event_select
||
80 intel_arch_events
[i
].unit_mask
!= unit_mask
)
83 /* disable event that reported as not present by cpuid */
84 if ((i
< 7) && !(pmu
->available_event_types
& (1 << i
)))
85 return PERF_COUNT_HW_MAX
+ 1;
90 if (i
== ARRAY_SIZE(intel_arch_events
))
91 return PERF_COUNT_HW_MAX
;
93 return intel_arch_events
[i
].event_type
;
96 /* check if a PMC is enabled by comparing it with globl_ctrl bits. */
97 static bool intel_pmc_is_enabled(struct kvm_pmc
*pmc
)
99 struct kvm_pmu
*pmu
= pmc_to_pmu(pmc
);
101 return test_bit(pmc
->idx
, (unsigned long *)&pmu
->global_ctrl
);
104 static struct kvm_pmc
*intel_pmc_idx_to_pmc(struct kvm_pmu
*pmu
, int pmc_idx
)
106 if (pmc_idx
< INTEL_PMC_IDX_FIXED
)
107 return get_gp_pmc(pmu
, MSR_P6_EVNTSEL0
+ pmc_idx
,
110 u32 idx
= pmc_idx
- INTEL_PMC_IDX_FIXED
;
112 return get_fixed_pmc(pmu
, idx
+ MSR_CORE_PERF_FIXED_CTR0
);
116 static bool intel_is_valid_rdpmc_ecx(struct kvm_vcpu
*vcpu
, unsigned int idx
)
118 struct kvm_pmu
*pmu
= vcpu_to_pmu(vcpu
);
119 bool fixed
= idx
& (1u << 30);
123 return fixed
? idx
< pmu
->nr_arch_fixed_counters
124 : idx
< pmu
->nr_arch_gp_counters
;
127 static struct kvm_pmc
*intel_rdpmc_ecx_to_pmc(struct kvm_vcpu
*vcpu
,
128 unsigned int idx
, u64
*mask
)
130 struct kvm_pmu
*pmu
= vcpu_to_pmu(vcpu
);
131 bool fixed
= idx
& (1u << 30);
132 struct kvm_pmc
*counters
;
133 unsigned int num_counters
;
137 counters
= pmu
->fixed_counters
;
138 num_counters
= pmu
->nr_arch_fixed_counters
;
140 counters
= pmu
->gp_counters
;
141 num_counters
= pmu
->nr_arch_gp_counters
;
143 if (idx
>= num_counters
)
145 *mask
&= pmu
->counter_bitmask
[fixed
? KVM_PMC_FIXED
: KVM_PMC_GP
];
146 return &counters
[array_index_nospec(idx
, num_counters
)];
149 static inline u64
vcpu_get_perf_capabilities(struct kvm_vcpu
*vcpu
)
151 if (!guest_cpuid_has(vcpu
, X86_FEATURE_PDCM
))
154 return vcpu
->arch
.perf_capabilities
;
157 static inline bool fw_writes_is_enabled(struct kvm_vcpu
*vcpu
)
159 return (vcpu_get_perf_capabilities(vcpu
) & PMU_CAP_FW_WRITES
) != 0;
162 static inline struct kvm_pmc
*get_fw_gp_pmc(struct kvm_pmu
*pmu
, u32 msr
)
164 if (!fw_writes_is_enabled(pmu_to_vcpu(pmu
)))
167 return get_gp_pmc(pmu
, msr
, MSR_IA32_PMC0
);
170 bool intel_pmu_lbr_is_compatible(struct kvm_vcpu
*vcpu
)
173 * As a first step, a guest could only enable LBR feature if its
174 * cpu model is the same as the host because the LBR registers
175 * would be pass-through to the guest and they're model specific.
177 return boot_cpu_data
.x86_model
== guest_cpuid_model(vcpu
);
180 bool intel_pmu_lbr_is_enabled(struct kvm_vcpu
*vcpu
)
182 struct x86_pmu_lbr
*lbr
= vcpu_to_lbr_records(vcpu
);
184 return lbr
->nr
&& (vcpu_get_perf_capabilities(vcpu
) & PMU_CAP_LBR_FMT
);
187 static bool intel_pmu_is_valid_lbr_msr(struct kvm_vcpu
*vcpu
, u32 index
)
189 struct x86_pmu_lbr
*records
= vcpu_to_lbr_records(vcpu
);
192 if (!intel_pmu_lbr_is_enabled(vcpu
))
195 ret
= (index
== MSR_LBR_SELECT
) || (index
== MSR_LBR_TOS
) ||
196 (index
>= records
->from
&& index
< records
->from
+ records
->nr
) ||
197 (index
>= records
->to
&& index
< records
->to
+ records
->nr
);
199 if (!ret
&& records
->info
)
200 ret
= (index
>= records
->info
&& index
< records
->info
+ records
->nr
);
205 static bool intel_is_valid_msr(struct kvm_vcpu
*vcpu
, u32 msr
)
207 struct kvm_pmu
*pmu
= vcpu_to_pmu(vcpu
);
211 case MSR_CORE_PERF_FIXED_CTR_CTRL
:
212 case MSR_CORE_PERF_GLOBAL_STATUS
:
213 case MSR_CORE_PERF_GLOBAL_CTRL
:
214 case MSR_CORE_PERF_GLOBAL_OVF_CTRL
:
215 ret
= pmu
->version
> 1;
218 ret
= get_gp_pmc(pmu
, msr
, MSR_IA32_PERFCTR0
) ||
219 get_gp_pmc(pmu
, msr
, MSR_P6_EVNTSEL0
) ||
220 get_fixed_pmc(pmu
, msr
) || get_fw_gp_pmc(pmu
, msr
) ||
221 intel_pmu_is_valid_lbr_msr(vcpu
, msr
);
228 static struct kvm_pmc
*intel_msr_idx_to_pmc(struct kvm_vcpu
*vcpu
, u32 msr
)
230 struct kvm_pmu
*pmu
= vcpu_to_pmu(vcpu
);
233 pmc
= get_fixed_pmc(pmu
, msr
);
234 pmc
= pmc
? pmc
: get_gp_pmc(pmu
, msr
, MSR_P6_EVNTSEL0
);
235 pmc
= pmc
? pmc
: get_gp_pmc(pmu
, msr
, MSR_IA32_PERFCTR0
);
240 static inline void intel_pmu_release_guest_lbr_event(struct kvm_vcpu
*vcpu
)
242 struct lbr_desc
*lbr_desc
= vcpu_to_lbr_desc(vcpu
);
244 if (lbr_desc
->event
) {
245 perf_event_release_kernel(lbr_desc
->event
);
246 lbr_desc
->event
= NULL
;
247 vcpu_to_pmu(vcpu
)->event_count
--;
251 int intel_pmu_create_guest_lbr_event(struct kvm_vcpu
*vcpu
)
253 struct lbr_desc
*lbr_desc
= vcpu_to_lbr_desc(vcpu
);
254 struct kvm_pmu
*pmu
= vcpu_to_pmu(vcpu
);
255 struct perf_event
*event
;
258 * The perf_event_attr is constructed in the minimum efficient way:
259 * - set 'pinned = true' to make it task pinned so that if another
260 * cpu pinned event reclaims LBR, the event->oncpu will be set to -1;
261 * - set '.exclude_host = true' to record guest branches behavior;
263 * - set '.config = INTEL_FIXED_VLBR_EVENT' to indicates host perf
264 * schedule the event without a real HW counter but a fake one;
265 * check is_guest_lbr_event() and __intel_get_event_constraints();
267 * - set 'sample_type = PERF_SAMPLE_BRANCH_STACK' and
268 * 'branch_sample_type = PERF_SAMPLE_BRANCH_CALL_STACK |
269 * PERF_SAMPLE_BRANCH_USER' to configure it as a LBR callstack
270 * event, which helps KVM to save/restore guest LBR records
271 * during host context switches and reduces quite a lot overhead,
272 * check branch_user_callstack() and intel_pmu_lbr_sched_task();
274 struct perf_event_attr attr
= {
275 .type
= PERF_TYPE_RAW
,
276 .size
= sizeof(attr
),
277 .config
= INTEL_FIXED_VLBR_EVENT
,
278 .sample_type
= PERF_SAMPLE_BRANCH_STACK
,
280 .exclude_host
= true,
281 .branch_sample_type
= PERF_SAMPLE_BRANCH_CALL_STACK
|
282 PERF_SAMPLE_BRANCH_USER
,
285 if (unlikely(lbr_desc
->event
)) {
286 __set_bit(INTEL_PMC_IDX_FIXED_VLBR
, pmu
->pmc_in_use
);
290 event
= perf_event_create_kernel_counter(&attr
, -1,
291 current
, NULL
, NULL
);
293 pr_debug_ratelimited("%s: failed %ld\n",
294 __func__
, PTR_ERR(event
));
295 return PTR_ERR(event
);
297 lbr_desc
->event
= event
;
299 __set_bit(INTEL_PMC_IDX_FIXED_VLBR
, pmu
->pmc_in_use
);
304 * It's safe to access LBR msrs from guest when they have not
305 * been passthrough since the host would help restore or reset
306 * the LBR msrs records when the guest LBR event is scheduled in.
308 static bool intel_pmu_handle_lbr_msrs_access(struct kvm_vcpu
*vcpu
,
309 struct msr_data
*msr_info
, bool read
)
311 struct lbr_desc
*lbr_desc
= vcpu_to_lbr_desc(vcpu
);
312 u32 index
= msr_info
->index
;
314 if (!intel_pmu_is_valid_lbr_msr(vcpu
, index
))
317 if (!lbr_desc
->event
&& intel_pmu_create_guest_lbr_event(vcpu
) < 0)
321 * Disable irq to ensure the LBR feature doesn't get reclaimed by the
322 * host at the time the value is read from the msr, and this avoids the
323 * host LBR value to be leaked to the guest. If LBR has been reclaimed,
324 * return 0 on guest reads.
327 if (lbr_desc
->event
->state
== PERF_EVENT_STATE_ACTIVE
) {
329 rdmsrl(index
, msr_info
->data
);
331 wrmsrl(index
, msr_info
->data
);
332 __set_bit(INTEL_PMC_IDX_FIXED_VLBR
, vcpu_to_pmu(vcpu
)->pmc_in_use
);
336 clear_bit(INTEL_PMC_IDX_FIXED_VLBR
, vcpu_to_pmu(vcpu
)->pmc_in_use
);
345 static int intel_pmu_get_msr(struct kvm_vcpu
*vcpu
, struct msr_data
*msr_info
)
347 struct kvm_pmu
*pmu
= vcpu_to_pmu(vcpu
);
349 u32 msr
= msr_info
->index
;
352 case MSR_CORE_PERF_FIXED_CTR_CTRL
:
353 msr_info
->data
= pmu
->fixed_ctr_ctrl
;
355 case MSR_CORE_PERF_GLOBAL_STATUS
:
356 msr_info
->data
= pmu
->global_status
;
358 case MSR_CORE_PERF_GLOBAL_CTRL
:
359 msr_info
->data
= pmu
->global_ctrl
;
361 case MSR_CORE_PERF_GLOBAL_OVF_CTRL
:
365 if ((pmc
= get_gp_pmc(pmu
, msr
, MSR_IA32_PERFCTR0
)) ||
366 (pmc
= get_gp_pmc(pmu
, msr
, MSR_IA32_PMC0
))) {
367 u64 val
= pmc_read_counter(pmc
);
369 val
& pmu
->counter_bitmask
[KVM_PMC_GP
];
371 } else if ((pmc
= get_fixed_pmc(pmu
, msr
))) {
372 u64 val
= pmc_read_counter(pmc
);
374 val
& pmu
->counter_bitmask
[KVM_PMC_FIXED
];
376 } else if ((pmc
= get_gp_pmc(pmu
, msr
, MSR_P6_EVNTSEL0
))) {
377 msr_info
->data
= pmc
->eventsel
;
379 } else if (intel_pmu_handle_lbr_msrs_access(vcpu
, msr_info
, true))
386 static int intel_pmu_set_msr(struct kvm_vcpu
*vcpu
, struct msr_data
*msr_info
)
388 struct kvm_pmu
*pmu
= vcpu_to_pmu(vcpu
);
390 u32 msr
= msr_info
->index
;
391 u64 data
= msr_info
->data
;
394 case MSR_CORE_PERF_FIXED_CTR_CTRL
:
395 if (pmu
->fixed_ctr_ctrl
== data
)
397 if (!(data
& 0xfffffffffffff444ull
)) {
398 reprogram_fixed_counters(pmu
, data
);
402 case MSR_CORE_PERF_GLOBAL_STATUS
:
403 if (msr_info
->host_initiated
) {
404 pmu
->global_status
= data
;
408 case MSR_CORE_PERF_GLOBAL_CTRL
:
409 if (pmu
->global_ctrl
== data
)
411 if (kvm_valid_perf_global_ctrl(pmu
, data
)) {
412 global_ctrl_changed(pmu
, data
);
416 case MSR_CORE_PERF_GLOBAL_OVF_CTRL
:
417 if (!(data
& pmu
->global_ovf_ctrl_mask
)) {
418 if (!msr_info
->host_initiated
)
419 pmu
->global_status
&= ~data
;
424 if ((pmc
= get_gp_pmc(pmu
, msr
, MSR_IA32_PERFCTR0
)) ||
425 (pmc
= get_gp_pmc(pmu
, msr
, MSR_IA32_PMC0
))) {
426 if ((msr
& MSR_PMC_FULL_WIDTH_BIT
) &&
427 (data
& ~pmu
->counter_bitmask
[KVM_PMC_GP
]))
429 if (!msr_info
->host_initiated
&&
430 !(msr
& MSR_PMC_FULL_WIDTH_BIT
))
431 data
= (s64
)(s32
)data
;
432 pmc
->counter
+= data
- pmc_read_counter(pmc
);
433 if (pmc
->perf_event
&& !pmc
->is_paused
)
434 perf_event_period(pmc
->perf_event
,
435 get_sample_period(pmc
, data
));
437 } else if ((pmc
= get_fixed_pmc(pmu
, msr
))) {
438 pmc
->counter
+= data
- pmc_read_counter(pmc
);
439 if (pmc
->perf_event
&& !pmc
->is_paused
)
440 perf_event_period(pmc
->perf_event
,
441 get_sample_period(pmc
, data
));
443 } else if ((pmc
= get_gp_pmc(pmu
, msr
, MSR_P6_EVNTSEL0
))) {
444 if (data
== pmc
->eventsel
)
446 if (!(data
& pmu
->reserved_bits
)) {
447 reprogram_gp_counter(pmc
, data
);
450 } else if (intel_pmu_handle_lbr_msrs_access(vcpu
, msr_info
, false))
457 static void setup_fixed_pmc_eventsel(struct kvm_pmu
*pmu
)
459 size_t size
= ARRAY_SIZE(fixed_pmc_events
);
464 for (i
= 0; i
< pmu
->nr_arch_fixed_counters
; i
++) {
465 pmc
= &pmu
->fixed_counters
[i
];
466 event
= fixed_pmc_events
[array_index_nospec(i
, size
)];
467 pmc
->eventsel
= (intel_arch_events
[event
].unit_mask
<< 8) |
468 intel_arch_events
[event
].eventsel
;
472 static void intel_pmu_refresh(struct kvm_vcpu
*vcpu
)
474 struct kvm_pmu
*pmu
= vcpu_to_pmu(vcpu
);
475 struct lbr_desc
*lbr_desc
= vcpu_to_lbr_desc(vcpu
);
477 struct x86_pmu_capability x86_pmu
;
478 struct kvm_cpuid_entry2
*entry
;
479 union cpuid10_eax eax
;
480 union cpuid10_edx edx
;
482 pmu
->nr_arch_gp_counters
= 0;
483 pmu
->nr_arch_fixed_counters
= 0;
484 pmu
->counter_bitmask
[KVM_PMC_GP
] = 0;
485 pmu
->counter_bitmask
[KVM_PMC_FIXED
] = 0;
487 pmu
->reserved_bits
= 0xffffffff00200000ull
;
489 entry
= kvm_find_cpuid_entry(vcpu
, 0xa, 0);
490 if (!entry
|| !vcpu
->kvm
->arch
.enable_pmu
)
492 eax
.full
= entry
->eax
;
493 edx
.full
= entry
->edx
;
495 pmu
->version
= eax
.split
.version_id
;
499 perf_get_x86_pmu_capability(&x86_pmu
);
501 pmu
->nr_arch_gp_counters
= min_t(int, eax
.split
.num_counters
,
502 x86_pmu
.num_counters_gp
);
503 eax
.split
.bit_width
= min_t(int, eax
.split
.bit_width
, x86_pmu
.bit_width_gp
);
504 pmu
->counter_bitmask
[KVM_PMC_GP
] = ((u64
)1 << eax
.split
.bit_width
) - 1;
505 eax
.split
.mask_length
= min_t(int, eax
.split
.mask_length
, x86_pmu
.events_mask_len
);
506 pmu
->available_event_types
= ~entry
->ebx
&
507 ((1ull << eax
.split
.mask_length
) - 1);
509 if (pmu
->version
== 1) {
510 pmu
->nr_arch_fixed_counters
= 0;
512 pmu
->nr_arch_fixed_counters
=
513 min3(ARRAY_SIZE(fixed_pmc_events
),
514 (size_t) edx
.split
.num_counters_fixed
,
515 (size_t) x86_pmu
.num_counters_fixed
);
516 edx
.split
.bit_width_fixed
= min_t(int,
517 edx
.split
.bit_width_fixed
, x86_pmu
.bit_width_fixed
);
518 pmu
->counter_bitmask
[KVM_PMC_FIXED
] =
519 ((u64
)1 << edx
.split
.bit_width_fixed
) - 1;
520 setup_fixed_pmc_eventsel(pmu
);
523 pmu
->global_ctrl
= ((1ull << pmu
->nr_arch_gp_counters
) - 1) |
524 (((1ull << pmu
->nr_arch_fixed_counters
) - 1) << INTEL_PMC_IDX_FIXED
);
525 pmu
->global_ctrl_mask
= ~pmu
->global_ctrl
;
526 pmu
->global_ovf_ctrl_mask
= pmu
->global_ctrl_mask
527 & ~(MSR_CORE_PERF_GLOBAL_OVF_CTRL_OVF_BUF
|
528 MSR_CORE_PERF_GLOBAL_OVF_CTRL_COND_CHGD
);
529 if (vmx_pt_mode_is_host_guest())
530 pmu
->global_ovf_ctrl_mask
&=
531 ~MSR_CORE_PERF_GLOBAL_OVF_CTRL_TRACE_TOPA_PMI
;
533 entry
= kvm_find_cpuid_entry(vcpu
, 7, 0);
535 (boot_cpu_has(X86_FEATURE_HLE
) || boot_cpu_has(X86_FEATURE_RTM
)) &&
536 (entry
->ebx
& (X86_FEATURE_HLE
|X86_FEATURE_RTM
)))
537 pmu
->reserved_bits
^= HSW_IN_TX
|HSW_IN_TX_CHECKPOINTED
;
539 bitmap_set(pmu
->all_valid_pmc_idx
,
540 0, pmu
->nr_arch_gp_counters
);
541 bitmap_set(pmu
->all_valid_pmc_idx
,
542 INTEL_PMC_MAX_GENERIC
, pmu
->nr_arch_fixed_counters
);
544 nested_vmx_pmu_refresh(vcpu
,
545 intel_is_valid_msr(vcpu
, MSR_CORE_PERF_GLOBAL_CTRL
));
547 if (intel_pmu_lbr_is_compatible(vcpu
))
548 x86_perf_get_lbr(&lbr_desc
->records
);
550 lbr_desc
->records
.nr
= 0;
552 if (lbr_desc
->records
.nr
)
553 bitmap_set(pmu
->all_valid_pmc_idx
, INTEL_PMC_IDX_FIXED_VLBR
, 1);
556 static void intel_pmu_init(struct kvm_vcpu
*vcpu
)
559 struct kvm_pmu
*pmu
= vcpu_to_pmu(vcpu
);
560 struct lbr_desc
*lbr_desc
= vcpu_to_lbr_desc(vcpu
);
562 for (i
= 0; i
< INTEL_PMC_MAX_GENERIC
; i
++) {
563 pmu
->gp_counters
[i
].type
= KVM_PMC_GP
;
564 pmu
->gp_counters
[i
].vcpu
= vcpu
;
565 pmu
->gp_counters
[i
].idx
= i
;
566 pmu
->gp_counters
[i
].current_config
= 0;
569 for (i
= 0; i
< KVM_PMC_MAX_FIXED
; i
++) {
570 pmu
->fixed_counters
[i
].type
= KVM_PMC_FIXED
;
571 pmu
->fixed_counters
[i
].vcpu
= vcpu
;
572 pmu
->fixed_counters
[i
].idx
= i
+ INTEL_PMC_IDX_FIXED
;
573 pmu
->fixed_counters
[i
].current_config
= 0;
576 vcpu
->arch
.perf_capabilities
= vmx_get_perf_capabilities();
577 lbr_desc
->records
.nr
= 0;
578 lbr_desc
->event
= NULL
;
579 lbr_desc
->msr_passthrough
= false;
582 static void intel_pmu_reset(struct kvm_vcpu
*vcpu
)
584 struct kvm_pmu
*pmu
= vcpu_to_pmu(vcpu
);
585 struct kvm_pmc
*pmc
= NULL
;
588 for (i
= 0; i
< INTEL_PMC_MAX_GENERIC
; i
++) {
589 pmc
= &pmu
->gp_counters
[i
];
591 pmc_stop_counter(pmc
);
592 pmc
->counter
= pmc
->eventsel
= 0;
595 for (i
= 0; i
< KVM_PMC_MAX_FIXED
; i
++) {
596 pmc
= &pmu
->fixed_counters
[i
];
598 pmc_stop_counter(pmc
);
602 pmu
->fixed_ctr_ctrl
= pmu
->global_ctrl
= pmu
->global_status
= 0;
604 intel_pmu_release_guest_lbr_event(vcpu
);
608 * Emulate LBR_On_PMI behavior for 1 < pmu.version < 4.
610 * If Freeze_LBR_On_PMI = 1, the LBR is frozen on PMI and
611 * the KVM emulates to clear the LBR bit (bit 0) in IA32_DEBUGCTL.
613 * Guest needs to re-enable LBR to resume branches recording.
615 static void intel_pmu_legacy_freezing_lbrs_on_pmi(struct kvm_vcpu
*vcpu
)
617 u64 data
= vmcs_read64(GUEST_IA32_DEBUGCTL
);
619 if (data
& DEBUGCTLMSR_FREEZE_LBRS_ON_PMI
) {
620 data
&= ~DEBUGCTLMSR_LBR
;
621 vmcs_write64(GUEST_IA32_DEBUGCTL
, data
);
625 static void intel_pmu_deliver_pmi(struct kvm_vcpu
*vcpu
)
627 u8 version
= vcpu_to_pmu(vcpu
)->version
;
629 if (!intel_pmu_lbr_is_enabled(vcpu
))
632 if (version
> 1 && version
< 4)
633 intel_pmu_legacy_freezing_lbrs_on_pmi(vcpu
);
636 static void vmx_update_intercept_for_lbr_msrs(struct kvm_vcpu
*vcpu
, bool set
)
638 struct x86_pmu_lbr
*lbr
= vcpu_to_lbr_records(vcpu
);
641 for (i
= 0; i
< lbr
->nr
; i
++) {
642 vmx_set_intercept_for_msr(vcpu
, lbr
->from
+ i
, MSR_TYPE_RW
, set
);
643 vmx_set_intercept_for_msr(vcpu
, lbr
->to
+ i
, MSR_TYPE_RW
, set
);
645 vmx_set_intercept_for_msr(vcpu
, lbr
->info
+ i
, MSR_TYPE_RW
, set
);
648 vmx_set_intercept_for_msr(vcpu
, MSR_LBR_SELECT
, MSR_TYPE_RW
, set
);
649 vmx_set_intercept_for_msr(vcpu
, MSR_LBR_TOS
, MSR_TYPE_RW
, set
);
652 static inline void vmx_disable_lbr_msrs_passthrough(struct kvm_vcpu
*vcpu
)
654 struct lbr_desc
*lbr_desc
= vcpu_to_lbr_desc(vcpu
);
656 if (!lbr_desc
->msr_passthrough
)
659 vmx_update_intercept_for_lbr_msrs(vcpu
, true);
660 lbr_desc
->msr_passthrough
= false;
663 static inline void vmx_enable_lbr_msrs_passthrough(struct kvm_vcpu
*vcpu
)
665 struct lbr_desc
*lbr_desc
= vcpu_to_lbr_desc(vcpu
);
667 if (lbr_desc
->msr_passthrough
)
670 vmx_update_intercept_for_lbr_msrs(vcpu
, false);
671 lbr_desc
->msr_passthrough
= true;
675 * Higher priority host perf events (e.g. cpu pinned) could reclaim the
676 * pmu resources (e.g. LBR) that were assigned to the guest. This is
677 * usually done via ipi calls (more details in perf_install_in_context).
679 * Before entering the non-root mode (with irq disabled here), double
680 * confirm that the pmu features enabled to the guest are not reclaimed
681 * by higher priority host events. Otherwise, disallow vcpu's access to
682 * the reclaimed features.
684 void vmx_passthrough_lbr_msrs(struct kvm_vcpu
*vcpu
)
686 struct kvm_pmu
*pmu
= vcpu_to_pmu(vcpu
);
687 struct lbr_desc
*lbr_desc
= vcpu_to_lbr_desc(vcpu
);
689 if (!lbr_desc
->event
) {
690 vmx_disable_lbr_msrs_passthrough(vcpu
);
691 if (vmcs_read64(GUEST_IA32_DEBUGCTL
) & DEBUGCTLMSR_LBR
)
693 if (test_bit(INTEL_PMC_IDX_FIXED_VLBR
, pmu
->pmc_in_use
))
698 if (lbr_desc
->event
->state
< PERF_EVENT_STATE_ACTIVE
) {
699 vmx_disable_lbr_msrs_passthrough(vcpu
);
700 __clear_bit(INTEL_PMC_IDX_FIXED_VLBR
, pmu
->pmc_in_use
);
703 vmx_enable_lbr_msrs_passthrough(vcpu
);
708 pr_warn_ratelimited("kvm: vcpu-%d: fail to passthrough LBR.\n",
712 static void intel_pmu_cleanup(struct kvm_vcpu
*vcpu
)
714 if (!(vmcs_read64(GUEST_IA32_DEBUGCTL
) & DEBUGCTLMSR_LBR
))
715 intel_pmu_release_guest_lbr_event(vcpu
);
718 struct kvm_pmu_ops intel_pmu_ops
= {
719 .pmc_perf_hw_id
= intel_pmc_perf_hw_id
,
720 .pmc_is_enabled
= intel_pmc_is_enabled
,
721 .pmc_idx_to_pmc
= intel_pmc_idx_to_pmc
,
722 .rdpmc_ecx_to_pmc
= intel_rdpmc_ecx_to_pmc
,
723 .msr_idx_to_pmc
= intel_msr_idx_to_pmc
,
724 .is_valid_rdpmc_ecx
= intel_is_valid_rdpmc_ecx
,
725 .is_valid_msr
= intel_is_valid_msr
,
726 .get_msr
= intel_pmu_get_msr
,
727 .set_msr
= intel_pmu_set_msr
,
728 .refresh
= intel_pmu_refresh
,
729 .init
= intel_pmu_init
,
730 .reset
= intel_pmu_reset
,
731 .deliver_pmi
= intel_pmu_deliver_pmi
,
732 .cleanup
= intel_pmu_cleanup
,