Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm

author Linus Torvalds <torvalds@linux-foundation.org>

Fri, 23 Jun 2023 00:54:10 +0000 (17:54 -0700)

committer Linus Torvalds <torvalds@linux-foundation.org>

Fri, 23 Jun 2023 00:54:10 +0000 (17:54 -0700)
author Linus Torvalds <torvalds@linux-foundation.org>
Fri, 23 Jun 2023 00:54:10 +0000 (17:54 -0700)
committer Linus Torvalds <torvalds@linux-foundation.org>
Fri, 23 Jun 2023 00:54:10 +0000 (17:54 -0700)
diff --git a/arch/arm/include/asm/arm_pmuv3.h b/arch/arm/include/asm/arm_pmuv3.h

index f4db3e75d75f024ef41943729e4558d6eb3bf604..f3cd04ff022df106f17ce676ede60d647cb3cf20 100644 (file)
--- a/arch/arm/include/asm/arm_pmuv3.h
+++ b/arch/arm/include/asm/arm_pmuv3.h
@@ -222,6 +222,11 @@ static inline bool kvm_pmu_counter_deferred(struct perf_event_attr *attr)
         return false;
  }
  
+static inline bool kvm_set_pmuserenr(u64 val)
+{
+       return false;
+}
+
  /* PMU Version in DFR Register */
  #define ARMV8_PMU_DFR_VER_NI        0
  #define ARMV8_PMU_DFR_VER_V3P4      0x5
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h

index 7e7e19ef6993ede45aea71c6f19b624731290fbe..9787503ff43fdfb01703302f7c0e63cb02db901d 100644 (file)
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -699,6 +699,8 @@ struct kvm_vcpu_arch {
  #define SYSREGS_ON_CPU         __vcpu_single_flag(sflags, BIT(4))
  /* Software step state is Active-pending */
  #define DBG_SS_ACTIVE_PENDING  __vcpu_single_flag(sflags, BIT(5))
+/* PMUSERENR for the guest EL0 is on physical CPU */
+#define PMUSERENR_ON_CPU       __vcpu_single_flag(sflags, BIT(6))
  
  
  /* Pointer to the vcpu's SVE FFR for sve_{save,load}_state() */
@@ -1065,9 +1067,14 @@ void kvm_arch_vcpu_put_debug_state_flags(struct kvm_vcpu *vcpu);
  #ifdef CONFIG_KVM
  void kvm_set_pmu_events(u32 set, struct perf_event_attr *attr);
  void kvm_clr_pmu_events(u32 clr);
+bool kvm_set_pmuserenr(u64 val);
  #else
  static inline void kvm_set_pmu_events(u32 set, struct perf_event_attr *attr) {}
  static inline void kvm_clr_pmu_events(u32 clr) {}
+static inline bool kvm_set_pmuserenr(u64 val)
+{
+       return false;
+}
  #endif
  
  void kvm_vcpu_load_sysregs_vhe(struct kvm_vcpu *vcpu);
diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h

index 5c15c58f90ccebb1afe2c364a5fec23559dfad03..4fe217efa2185a73bea804b1842deff3684918a7 100644 (file)
--- a/arch/arm64/kvm/hyp/include/hyp/switch.h
+++ b/arch/arm64/kvm/hyp/include/hyp/switch.h
@@ -82,8 +82,14 @@ static inline void __activate_traps_common(struct kvm_vcpu *vcpu)
          * EL1 instead of being trapped to EL2.
          */
         if (kvm_arm_support_pmu_v3()) {
+               struct kvm_cpu_context *hctxt;
+
                 write_sysreg(0, pmselr_el0);
+
+               hctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt;
+               ctxt_sys_reg(hctxt, PMUSERENR_EL0) = read_sysreg(pmuserenr_el0);
                 write_sysreg(ARMV8_PMU_USERENR_MASK, pmuserenr_el0);
+               vcpu_set_flag(vcpu, PMUSERENR_ON_CPU);
         }
  
         vcpu->arch.mdcr_el2_host = read_sysreg(mdcr_el2);
@@ -106,8 +112,13 @@ static inline void __deactivate_traps_common(struct kvm_vcpu *vcpu)
         write_sysreg(vcpu->arch.mdcr_el2_host, mdcr_el2);
  
         write_sysreg(0, hstr_el2);
-       if (kvm_arm_support_pmu_v3())
-               write_sysreg(0, pmuserenr_el0);
+       if (kvm_arm_support_pmu_v3()) {
+               struct kvm_cpu_context *hctxt;
+
+               hctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt;
+               write_sysreg(ctxt_sys_reg(hctxt, PMUSERENR_EL0), pmuserenr_el0);
+               vcpu_clear_flag(vcpu, PMUSERENR_ON_CPU);
+       }
  
         if (cpus_have_final_cap(ARM64_SME)) {
                 sysreg_clear_set_s(SYS_HFGRTR_EL2, 0,
diff --git a/arch/arm64/kvm/hyp/vhe/switch.c b/arch/arm64/kvm/hyp/vhe/switch.c

index 7a1aa511e7da6f68fc88eb4a6b39dcb7927ded09..b37e7c96efea188cf8ed0ed1fb86744948d0065c 100644 (file)
--- a/arch/arm64/kvm/hyp/vhe/switch.c
+++ b/arch/arm64/kvm/hyp/vhe/switch.c
@@ -92,14 +92,28 @@ static void __deactivate_traps(struct kvm_vcpu *vcpu)
  }
  NOKPROBE_SYMBOL(__deactivate_traps);
  
+/*
+ * Disable IRQs in {activate,deactivate}_traps_vhe_{load,put}() to
+ * prevent a race condition between context switching of PMUSERENR_EL0
+ * in __{activate,deactivate}_traps_common() and IPIs that attempts to
+ * update PMUSERENR_EL0. See also kvm_set_pmuserenr().
+ */
  void activate_traps_vhe_load(struct kvm_vcpu *vcpu)
  {
+       unsigned long flags;
+
+       local_irq_save(flags);
         __activate_traps_common(vcpu);
+       local_irq_restore(flags);
  }
  
  void deactivate_traps_vhe_put(struct kvm_vcpu *vcpu)
  {
+       unsigned long flags;
+
+       local_irq_save(flags);
         __deactivate_traps_common(vcpu);
+       local_irq_restore(flags);
  }
  
  static const exit_handler_fn hyp_exit_handlers[] = {
diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c

index 491ca7eb2a4c6e186abbbae9112942e0420f0116..5606509724787383ef90fb87e30ec4008303e6f9 100644 (file)
--- a/arch/arm64/kvm/pmu-emul.c
+++ b/arch/arm64/kvm/pmu-emul.c
@@ -700,7 +700,25 @@ static struct arm_pmu *kvm_pmu_probe_armpmu(void)
  
         mutex_lock(&arm_pmus_lock);
  
-       cpu = smp_processor_id();
+       /*
+        * It is safe to use a stale cpu to iterate the list of PMUs so long as
+        * the same value is used for the entirety of the loop. Given this, and
+        * the fact that no percpu data is used for the lookup there is no need
+        * to disable preemption.
+        *
+        * It is still necessary to get a valid cpu, though, to probe for the
+        * default PMU instance as userspace is not required to specify a PMU
+        * type. In order to uphold the preexisting behavior KVM selects the
+        * PMU instance for the core where the first call to the
+        * KVM_ARM_VCPU_PMU_V3_CTRL attribute group occurs. A dependent use case
+        * would be a user with disdain of all things big.LITTLE that affines
+        * the VMM to a particular cluster of cores.
+        *
+        * In any case, userspace should just do the sane thing and use the UAPI
+        * to select a PMU type directly. But, be wary of the baggage being
+        * carried here.
+        */
+       cpu = raw_smp_processor_id();
         list_for_each_entry(entry, &arm_pmus, entry) {
                 tmp = entry->arm_pmu;
  
diff --git a/arch/arm64/kvm/pmu.c b/arch/arm64/kvm/pmu.c

index 7887133d15f0b67f8b04c2c01b8c17ed2d9cb5d8..121f1a14c829c2804127427d4331d91d84d1ab13 100644 (file)
--- a/arch/arm64/kvm/pmu.c
+++ b/arch/arm64/kvm/pmu.c
@@ -209,3 +209,30 @@ void kvm_vcpu_pmu_restore_host(struct kvm_vcpu *vcpu)
         kvm_vcpu_pmu_enable_el0(events_host);
         kvm_vcpu_pmu_disable_el0(events_guest);
  }
+
+/*
+ * With VHE, keep track of the PMUSERENR_EL0 value for the host EL0 on the pCPU
+ * where PMUSERENR_EL0 for the guest is loaded, since PMUSERENR_EL0 is switched
+ * to the value for the guest on vcpu_load().  The value for the host EL0
+ * will be restored on vcpu_put(), before returning to userspace.
+ * This isn't necessary for nVHE, as the register is context switched for
+ * every guest enter/exit.
+ *
+ * Return true if KVM takes care of the register. Otherwise return false.
+ */
+bool kvm_set_pmuserenr(u64 val)
+{
+       struct kvm_cpu_context *hctxt;
+       struct kvm_vcpu *vcpu;
+
+       if (!kvm_arm_support_pmu_v3() || !has_vhe())
+               return false;
+
+       vcpu = kvm_get_running_vcpu();
+       if (!vcpu || !vcpu_get_flag(vcpu, PMUSERENR_ON_CPU))
+               return false;
+
+       hctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt;
+       ctxt_sys_reg(hctxt, PMUSERENR_EL0) = val;
+       return true;
+}
diff --git a/arch/arm64/kvm/vgic/vgic-init.c b/arch/arm64/kvm/vgic/vgic-init.c

index 6eafc2c45cfcf73e1bb5d817545ec4f290a388e9..c8c3cb812783218e93065d157291d09c7a4a0812 100644 (file)
--- a/arch/arm64/kvm/vgic/vgic-init.c
+++ b/arch/arm64/kvm/vgic/vgic-init.c
@@ -446,6 +446,7 @@ int vgic_lazy_init(struct kvm *kvm)
  int kvm_vgic_map_resources(struct kvm *kvm)
  {
         struct vgic_dist *dist = &kvm->arch.vgic;
+       enum vgic_type type;
         gpa_t dist_base;
         int ret = 0;
  
@@ -460,10 +461,13 @@ int kvm_vgic_map_resources(struct kvm *kvm)
         if (!irqchip_in_kernel(kvm))
                 goto out;
  
-       if (dist->vgic_model == KVM_DEV_TYPE_ARM_VGIC_V2)
+       if (dist->vgic_model == KVM_DEV_TYPE_ARM_VGIC_V2) {
                 ret = vgic_v2_map_resources(kvm);
-       else
+               type = VGIC_V2;
+       } else {
                 ret = vgic_v3_map_resources(kvm);
+               type = VGIC_V3;
+       }
  
         if (ret) {
                 __kvm_vgic_destroy(kvm);
@@ -473,8 +477,7 @@ int kvm_vgic_map_resources(struct kvm *kvm)
         dist_base = dist->vgic_dist_base;
         mutex_unlock(&kvm->arch.config_lock);
  
-       ret = vgic_register_dist_iodev(kvm, dist_base,
-                                      kvm_vgic_global_state.type);
+       ret = vgic_register_dist_iodev(kvm, dist_base, type);
         if (ret) {
                 kvm_err("Unable to register VGIC dist MMIO regions\n");
                 kvm_vgic_destroy(kvm);
diff --git a/drivers/perf/arm_pmuv3.c b/drivers/perf/arm_pmuv3.c

index c98e4039386dbcbdb1de1f3c8848dbf038b0ceb9..93b7edb5f1e7c15fadc945510b3674dfc43374b0 100644 (file)
--- a/drivers/perf/arm_pmuv3.c
+++ b/drivers/perf/arm_pmuv3.c
@@ -677,9 +677,25 @@ static inline u32 armv8pmu_getreset_flags(void)
         return value;
  }
  
+static void update_pmuserenr(u64 val)
+{
+       lockdep_assert_irqs_disabled();
+
+       /*
+        * The current PMUSERENR_EL0 value might be the value for the guest.
+        * If that's the case, have KVM keep tracking of the register value
+        * for the host EL0 so that KVM can restore it before returning to
+        * the host EL0. Otherwise, update the register now.
+        */
+       if (kvm_set_pmuserenr(val))
+               return;
+
+       write_pmuserenr(val);
+}
+
  static void armv8pmu_disable_user_access(void)
  {
-       write_pmuserenr(0);
+       update_pmuserenr(0);
  }
  
  static void armv8pmu_enable_user_access(struct arm_pmu *cpu_pmu)
@@ -695,8 +711,7 @@ static void armv8pmu_enable_user_access(struct arm_pmu *cpu_pmu)
                         armv8pmu_write_evcntr(i, 0);
         }
  
-       write_pmuserenr(0);
-       write_pmuserenr(ARMV8_PMU_USERENR_ER | ARMV8_PMU_USERENR_CR);
+       update_pmuserenr(ARMV8_PMU_USERENR_ER | ARMV8_PMU_USERENR_CR);
  }
  
  static void armv8pmu_enable_event(struct perf_event *event)
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c

index 479802a892d4f7aace4d72a38e8fab18e5e7dc03..65f94f592ff88380d0b6af736600eab0f5a38cbb 100644 (file)
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -686,6 +686,24 @@ static __always_inline int kvm_handle_hva_range_no_flush(struct mmu_notifier *mn
  
         return __kvm_handle_hva_range(kvm, &range);
  }
+
+static bool kvm_change_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
+{
+       /*
+        * Skipping invalid memslots is correct if and only change_pte() is
+        * surrounded by invalidate_range_{start,end}(), which is currently
+        * guaranteed by the primary MMU.  If that ever changes, KVM needs to
+        * unmap the memslot instead of skipping the memslot to ensure that KVM
+        * doesn't hold references to the old PFN.
+        */
+       WARN_ON_ONCE(!READ_ONCE(kvm->mn_active_invalidate_count));
+
+       if (range->slot->flags & KVM_MEMSLOT_INVALID)
+               return false;
+
+       return kvm_set_spte_gfn(kvm, range);
+}
+
  static void kvm_mmu_notifier_change_pte(struct mmu_notifier *mn,
                                         struct mm_struct *mm,
                                         unsigned long address,
@@ -707,7 +725,7 @@ static void kvm_mmu_notifier_change_pte(struct mmu_notifier *mn,
         if (!READ_ONCE(kvm->mmu_invalidate_in_progress))
                 return;
  
-       kvm_handle_hva_range(mn, address, address + 1, pte, kvm_set_spte_gfn);
+       kvm_handle_hva_range(mn, address, address + 1, pte, kvm_change_spte_gfn);
  }
  
  void kvm_mmu_invalidate_begin(struct kvm *kvm, unsigned long start,
author	Linus Torvalds <torvalds@linux-foundation.org>
	Fri, 23 Jun 2023 00:54:10 +0000 (17:54 -0700)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Fri, 23 Jun 2023 00:54:10 +0000 (17:54 -0700)
arch/arm/include/asm/arm_pmuv3.h		patch \| blob \| blame \| history
arch/arm64/include/asm/kvm_host.h		patch \| blob \| blame \| history
arch/arm64/kvm/hyp/include/hyp/switch.h		patch \| blob \| blame \| history
arch/arm64/kvm/hyp/vhe/switch.c		patch \| blob \| blame \| history
arch/arm64/kvm/pmu-emul.c		patch \| blob \| blame \| history
arch/arm64/kvm/pmu.c		patch \| blob \| blame \| history
arch/arm64/kvm/vgic/vgic-init.c		patch \| blob \| blame \| history
drivers/perf/arm_pmuv3.c		patch \| blob \| blame \| history
virt/kvm/kvm_main.c		patch \| blob \| blame \| history