]> git.proxmox.com Git - mirror_ubuntu-kernels.git/commitdiff
KVM: VMX: Extract posted interrupt support to separate files
authorXiaoyao Li <xiaoyao.li@intel.com>
Wed, 23 Sep 2020 18:31:11 +0000 (11:31 -0700)
committerPaolo Bonzini <pbonzini@redhat.com>
Mon, 28 Sep 2020 11:57:38 +0000 (07:57 -0400)
Extract the posted interrupt code so that it can be reused for Trust
Domain Extensions (TDX), which requires posted interrupts and can use
KVM VMX's implementation almost verbatim.  TDX is different enough from
raw VMX that it is highly desirable to implement the guts of TDX in a
separate file, i.e. reusing posted interrupt code by shoving TDX support
into vmx.c would be a mess.

Signed-off-by: Xiaoyao Li <xiaoyao.li@intel.com>
Co-developed-by: Sean Christopherson <sean.j.christopherson@intel.com>
Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
Message-Id: <20200923183112.3030-2-sean.j.christopherson@intel.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
arch/x86/kvm/Makefile
arch/x86/kvm/vmx/posted_intr.c [new file with mode: 0644]
arch/x86/kvm/vmx/posted_intr.h [new file with mode: 0644]
arch/x86/kvm/vmx/vmx.c
arch/x86/kvm/vmx/vmx.h

index 4a3081e9f4b5de97c4b90c1858284731088778ce..7f86a14aed0e924de740f3d2ccd7f67306daa0eb 100644 (file)
@@ -17,7 +17,8 @@ kvm-y                 += x86.o emulate.o i8259.o irq.o lapic.o \
                           i8254.o ioapic.o irq_comm.o cpuid.o pmu.o mtrr.o \
                           hyperv.o debugfs.o mmu/mmu.o mmu/page_track.o
 
-kvm-intel-y            += vmx/vmx.o vmx/vmenter.o vmx/pmu_intel.o vmx/vmcs12.o vmx/evmcs.o vmx/nested.o
+kvm-intel-y            += vmx/vmx.o vmx/vmenter.o vmx/pmu_intel.o vmx/vmcs12.o \
+                          vmx/evmcs.o vmx/nested.o vmx/posted_intr.o
 kvm-amd-y              += svm/svm.o svm/vmenter.o svm/pmu.o svm/nested.o svm/avic.o svm/sev.o
 
 obj-$(CONFIG_KVM)      += kvm.o
diff --git a/arch/x86/kvm/vmx/posted_intr.c b/arch/x86/kvm/vmx/posted_intr.c
new file mode 100644 (file)
index 0000000..e4e7adf
--- /dev/null
@@ -0,0 +1,332 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <linux/kvm_host.h>
+
+#include <asm/irq_remapping.h>
+#include <asm/cpu.h>
+
+#include "lapic.h"
+#include "posted_intr.h"
+#include "trace.h"
+#include "vmx.h"
+
+/*
+ * We maintian a per-CPU linked-list of vCPU, so in wakeup_handler() we
+ * can find which vCPU should be waken up.
+ */
+static DEFINE_PER_CPU(struct list_head, blocked_vcpu_on_cpu);
+static DEFINE_PER_CPU(spinlock_t, blocked_vcpu_on_cpu_lock);
+
+static inline struct pi_desc *vcpu_to_pi_desc(struct kvm_vcpu *vcpu)
+{
+       return &(to_vmx(vcpu)->pi_desc);
+}
+
+void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu)
+{
+       struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
+       struct pi_desc old, new;
+       unsigned int dest;
+
+       /*
+        * In case of hot-plug or hot-unplug, we may have to undo
+        * vmx_vcpu_pi_put even if there is no assigned device.  And we
+        * always keep PI.NDST up to date for simplicity: it makes the
+        * code easier, and CPU migration is not a fast path.
+        */
+       if (!pi_test_sn(pi_desc) && vcpu->cpu == cpu)
+               return;
+
+       /*
+        * If the 'nv' field is POSTED_INTR_WAKEUP_VECTOR, do not change
+        * PI.NDST: pi_post_block is the one expected to change PID.NDST and the
+        * wakeup handler expects the vCPU to be on the blocked_vcpu_list that
+        * matches PI.NDST. Otherwise, a vcpu may not be able to be woken up
+        * correctly.
+        */
+       if (pi_desc->nv == POSTED_INTR_WAKEUP_VECTOR || vcpu->cpu == cpu) {
+               pi_clear_sn(pi_desc);
+               goto after_clear_sn;
+       }
+
+       /* The full case.  */
+       do {
+               old.control = new.control = pi_desc->control;
+
+               dest = cpu_physical_id(cpu);
+
+               if (x2apic_enabled())
+                       new.ndst = dest;
+               else
+                       new.ndst = (dest << 8) & 0xFF00;
+
+               new.sn = 0;
+       } while (cmpxchg64(&pi_desc->control, old.control,
+                          new.control) != old.control);
+
+after_clear_sn:
+
+       /*
+        * Clear SN before reading the bitmap.  The VT-d firmware
+        * writes the bitmap and reads SN atomically (5.2.3 in the
+        * spec), so it doesn't really have a memory barrier that
+        * pairs with this, but we cannot do that and we need one.
+        */
+       smp_mb__after_atomic();
+
+       if (!pi_is_pir_empty(pi_desc))
+               pi_set_on(pi_desc);
+}
+
+void vmx_vcpu_pi_put(struct kvm_vcpu *vcpu)
+{
+       struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
+
+       if (!kvm_arch_has_assigned_device(vcpu->kvm) ||
+               !irq_remapping_cap(IRQ_POSTING_CAP)  ||
+               !kvm_vcpu_apicv_active(vcpu))
+               return;
+
+       /* Set SN when the vCPU is preempted */
+       if (vcpu->preempted)
+               pi_set_sn(pi_desc);
+}
+
+static void __pi_post_block(struct kvm_vcpu *vcpu)
+{
+       struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
+       struct pi_desc old, new;
+       unsigned int dest;
+
+       do {
+               old.control = new.control = pi_desc->control;
+               WARN(old.nv != POSTED_INTR_WAKEUP_VECTOR,
+                    "Wakeup handler not enabled while the VCPU is blocked\n");
+
+               dest = cpu_physical_id(vcpu->cpu);
+
+               if (x2apic_enabled())
+                       new.ndst = dest;
+               else
+                       new.ndst = (dest << 8) & 0xFF00;
+
+               /* set 'NV' to 'notification vector' */
+               new.nv = POSTED_INTR_VECTOR;
+       } while (cmpxchg64(&pi_desc->control, old.control,
+                          new.control) != old.control);
+
+       if (!WARN_ON_ONCE(vcpu->pre_pcpu == -1)) {
+               spin_lock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu));
+               list_del(&vcpu->blocked_vcpu_list);
+               spin_unlock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu));
+               vcpu->pre_pcpu = -1;
+       }
+}
+
+/*
+ * This routine does the following things for vCPU which is going
+ * to be blocked if VT-d PI is enabled.
+ * - Store the vCPU to the wakeup list, so when interrupts happen
+ *   we can find the right vCPU to wake up.
+ * - Change the Posted-interrupt descriptor as below:
+ *      'NDST' <-- vcpu->pre_pcpu
+ *      'NV' <-- POSTED_INTR_WAKEUP_VECTOR
+ * - If 'ON' is set during this process, which means at least one
+ *   interrupt is posted for this vCPU, we cannot block it, in
+ *   this case, return 1, otherwise, return 0.
+ *
+ */
+int pi_pre_block(struct kvm_vcpu *vcpu)
+{
+       unsigned int dest;
+       struct pi_desc old, new;
+       struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
+
+       if (!kvm_arch_has_assigned_device(vcpu->kvm) ||
+               !irq_remapping_cap(IRQ_POSTING_CAP)  ||
+               !kvm_vcpu_apicv_active(vcpu))
+               return 0;
+
+       WARN_ON(irqs_disabled());
+       local_irq_disable();
+       if (!WARN_ON_ONCE(vcpu->pre_pcpu != -1)) {
+               vcpu->pre_pcpu = vcpu->cpu;
+               spin_lock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu));
+               list_add_tail(&vcpu->blocked_vcpu_list,
+                             &per_cpu(blocked_vcpu_on_cpu,
+                                      vcpu->pre_pcpu));
+               spin_unlock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu));
+       }
+
+       do {
+               old.control = new.control = pi_desc->control;
+
+               WARN((pi_desc->sn == 1),
+                    "Warning: SN field of posted-interrupts "
+                    "is set before blocking\n");
+
+               /*
+                * Since vCPU can be preempted during this process,
+                * vcpu->cpu could be different with pre_pcpu, we
+                * need to set pre_pcpu as the destination of wakeup
+                * notification event, then we can find the right vCPU
+                * to wakeup in wakeup handler if interrupts happen
+                * when the vCPU is in blocked state.
+                */
+               dest = cpu_physical_id(vcpu->pre_pcpu);
+
+               if (x2apic_enabled())
+                       new.ndst = dest;
+               else
+                       new.ndst = (dest << 8) & 0xFF00;
+
+               /* set 'NV' to 'wakeup vector' */
+               new.nv = POSTED_INTR_WAKEUP_VECTOR;
+       } while (cmpxchg64(&pi_desc->control, old.control,
+                          new.control) != old.control);
+
+       /* We should not block the vCPU if an interrupt is posted for it.  */
+       if (pi_test_on(pi_desc) == 1)
+               __pi_post_block(vcpu);
+
+       local_irq_enable();
+       return (vcpu->pre_pcpu == -1);
+}
+
+void pi_post_block(struct kvm_vcpu *vcpu)
+{
+       if (vcpu->pre_pcpu == -1)
+               return;
+
+       WARN_ON(irqs_disabled());
+       local_irq_disable();
+       __pi_post_block(vcpu);
+       local_irq_enable();
+}
+
+/*
+ * Handler for POSTED_INTERRUPT_WAKEUP_VECTOR.
+ */
+void pi_wakeup_handler(void)
+{
+       struct kvm_vcpu *vcpu;
+       int cpu = smp_processor_id();
+
+       spin_lock(&per_cpu(blocked_vcpu_on_cpu_lock, cpu));
+       list_for_each_entry(vcpu, &per_cpu(blocked_vcpu_on_cpu, cpu),
+                       blocked_vcpu_list) {
+               struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
+
+               if (pi_test_on(pi_desc) == 1)
+                       kvm_vcpu_kick(vcpu);
+       }
+       spin_unlock(&per_cpu(blocked_vcpu_on_cpu_lock, cpu));
+}
+
+void __init pi_init(int cpu)
+{
+       INIT_LIST_HEAD(&per_cpu(blocked_vcpu_on_cpu, cpu));
+       spin_lock_init(&per_cpu(blocked_vcpu_on_cpu_lock, cpu));
+}
+
+bool pi_has_pending_interrupt(struct kvm_vcpu *vcpu)
+{
+       struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
+
+       return pi_test_on(pi_desc) ||
+               (pi_test_sn(pi_desc) && !pi_is_pir_empty(pi_desc));
+}
+
+
+/*
+ * pi_update_irte - set IRTE for Posted-Interrupts
+ *
+ * @kvm: kvm
+ * @host_irq: host irq of the interrupt
+ * @guest_irq: gsi of the interrupt
+ * @set: set or unset PI
+ * returns 0 on success, < 0 on failure
+ */
+int pi_update_irte(struct kvm *kvm, unsigned int host_irq, uint32_t guest_irq,
+                  bool set)
+{
+       struct kvm_kernel_irq_routing_entry *e;
+       struct kvm_irq_routing_table *irq_rt;
+       struct kvm_lapic_irq irq;
+       struct kvm_vcpu *vcpu;
+       struct vcpu_data vcpu_info;
+       int idx, ret = 0;
+
+       if (!kvm_arch_has_assigned_device(kvm) ||
+           !irq_remapping_cap(IRQ_POSTING_CAP) ||
+           !kvm_vcpu_apicv_active(kvm->vcpus[0]))
+               return 0;
+
+       idx = srcu_read_lock(&kvm->irq_srcu);
+       irq_rt = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu);
+       if (guest_irq >= irq_rt->nr_rt_entries ||
+           hlist_empty(&irq_rt->map[guest_irq])) {
+               pr_warn_once("no route for guest_irq %u/%u (broken user space?)\n",
+                            guest_irq, irq_rt->nr_rt_entries);
+               goto out;
+       }
+
+       hlist_for_each_entry(e, &irq_rt->map[guest_irq], link) {
+               if (e->type != KVM_IRQ_ROUTING_MSI)
+                       continue;
+               /*
+                * VT-d PI cannot support posting multicast/broadcast
+                * interrupts to a vCPU, we still use interrupt remapping
+                * for these kind of interrupts.
+                *
+                * For lowest-priority interrupts, we only support
+                * those with single CPU as the destination, e.g. user
+                * configures the interrupts via /proc/irq or uses
+                * irqbalance to make the interrupts single-CPU.
+                *
+                * We will support full lowest-priority interrupt later.
+                *
+                * In addition, we can only inject generic interrupts using
+                * the PI mechanism, refuse to route others through it.
+                */
+
+               kvm_set_msi_irq(kvm, e, &irq);
+               if (!kvm_intr_is_single_vcpu(kvm, &irq, &vcpu) ||
+                   !kvm_irq_is_postable(&irq)) {
+                       /*
+                        * Make sure the IRTE is in remapped mode if
+                        * we don't handle it in posted mode.
+                        */
+                       ret = irq_set_vcpu_affinity(host_irq, NULL);
+                       if (ret < 0) {
+                               printk(KERN_INFO
+                                  "failed to back to remapped mode, irq: %u\n",
+                                  host_irq);
+                               goto out;
+                       }
+
+                       continue;
+               }
+
+               vcpu_info.pi_desc_addr = __pa(&to_vmx(vcpu)->pi_desc);
+               vcpu_info.vector = irq.vector;
+
+               trace_kvm_pi_irte_update(host_irq, vcpu->vcpu_id, e->gsi,
+                               vcpu_info.vector, vcpu_info.pi_desc_addr, set);
+
+               if (set)
+                       ret = irq_set_vcpu_affinity(host_irq, &vcpu_info);
+               else
+                       ret = irq_set_vcpu_affinity(host_irq, NULL);
+
+               if (ret < 0) {
+                       printk(KERN_INFO "%s: failed to update PI IRTE\n",
+                                       __func__);
+                       goto out;
+               }
+       }
+
+       ret = 0;
+out:
+       srcu_read_unlock(&kvm->irq_srcu, idx);
+       return ret;
+}
diff --git a/arch/x86/kvm/vmx/posted_intr.h b/arch/x86/kvm/vmx/posted_intr.h
new file mode 100644 (file)
index 0000000..e53b97f
--- /dev/null
@@ -0,0 +1,99 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __KVM_X86_VMX_POSTED_INTR_H
+#define __KVM_X86_VMX_POSTED_INTR_H
+
+#define POSTED_INTR_ON  0
+#define POSTED_INTR_SN  1
+
+/* Posted-Interrupt Descriptor */
+struct pi_desc {
+       u32 pir[8];     /* Posted interrupt requested */
+       union {
+               struct {
+                               /* bit 256 - Outstanding Notification */
+                       u16     on      : 1,
+                               /* bit 257 - Suppress Notification */
+                               sn      : 1,
+                               /* bit 271:258 - Reserved */
+                               rsvd_1  : 14;
+                               /* bit 279:272 - Notification Vector */
+                       u8      nv;
+                               /* bit 287:280 - Reserved */
+                       u8      rsvd_2;
+                               /* bit 319:288 - Notification Destination */
+                       u32     ndst;
+               };
+               u64 control;
+       };
+       u32 rsvd[6];
+} __aligned(64);
+
+static inline bool pi_test_and_set_on(struct pi_desc *pi_desc)
+{
+       return test_and_set_bit(POSTED_INTR_ON,
+                       (unsigned long *)&pi_desc->control);
+}
+
+static inline bool pi_test_and_clear_on(struct pi_desc *pi_desc)
+{
+       return test_and_clear_bit(POSTED_INTR_ON,
+                       (unsigned long *)&pi_desc->control);
+}
+
+static inline int pi_test_and_set_pir(int vector, struct pi_desc *pi_desc)
+{
+       return test_and_set_bit(vector, (unsigned long *)pi_desc->pir);
+}
+
+static inline bool pi_is_pir_empty(struct pi_desc *pi_desc)
+{
+       return bitmap_empty((unsigned long *)pi_desc->pir, NR_VECTORS);
+}
+
+static inline void pi_set_sn(struct pi_desc *pi_desc)
+{
+       set_bit(POSTED_INTR_SN,
+               (unsigned long *)&pi_desc->control);
+}
+
+static inline void pi_set_on(struct pi_desc *pi_desc)
+{
+       set_bit(POSTED_INTR_ON,
+               (unsigned long *)&pi_desc->control);
+}
+
+static inline void pi_clear_on(struct pi_desc *pi_desc)
+{
+       clear_bit(POSTED_INTR_ON,
+               (unsigned long *)&pi_desc->control);
+}
+
+static inline void pi_clear_sn(struct pi_desc *pi_desc)
+{
+       clear_bit(POSTED_INTR_SN,
+               (unsigned long *)&pi_desc->control);
+}
+
+static inline int pi_test_on(struct pi_desc *pi_desc)
+{
+       return test_bit(POSTED_INTR_ON,
+                       (unsigned long *)&pi_desc->control);
+}
+
+static inline int pi_test_sn(struct pi_desc *pi_desc)
+{
+       return test_bit(POSTED_INTR_SN,
+                       (unsigned long *)&pi_desc->control);
+}
+
+void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu);
+void vmx_vcpu_pi_put(struct kvm_vcpu *vcpu);
+int pi_pre_block(struct kvm_vcpu *vcpu);
+void pi_post_block(struct kvm_vcpu *vcpu);
+void pi_wakeup_handler(void);
+void __init pi_init(int cpu);
+bool pi_has_pending_interrupt(struct kvm_vcpu *vcpu);
+int pi_update_irte(struct kvm *kvm, unsigned int host_irq, uint32_t guest_irq,
+                  bool set);
+
+#endif /* __KVM_X86_VMX_POSTED_INTR_H */
\ No newline at end of file
index b0ba1cc79e6a779eeca4411093819414231411c1..2cf068f452271c18b0c787ba15434035f707b5d0 100644 (file)
@@ -395,13 +395,6 @@ DEFINE_PER_CPU(struct vmcs *, current_vmcs);
  */
 static DEFINE_PER_CPU(struct list_head, loaded_vmcss_on_cpu);
 
-/*
- * We maintian a per-CPU linked-list of vCPU, so in wakeup_handler() we
- * can find which vCPU should be waken up.
- */
-static DEFINE_PER_CPU(struct list_head, blocked_vcpu_on_cpu);
-static DEFINE_PER_CPU(spinlock_t, blocked_vcpu_on_cpu_lock);
-
 static DECLARE_BITMAP(vmx_vpid_bitmap, VMX_NR_VPIDS);
 static DEFINE_SPINLOCK(vmx_vpid_lock);
 
@@ -1256,62 +1249,6 @@ static void vmx_write_guest_kernel_gs_base(struct vcpu_vmx *vmx, u64 data)
 }
 #endif
 
-static void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu)
-{
-       struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
-       struct pi_desc old, new;
-       unsigned int dest;
-
-       /*
-        * In case of hot-plug or hot-unplug, we may have to undo
-        * vmx_vcpu_pi_put even if there is no assigned device.  And we
-        * always keep PI.NDST up to date for simplicity: it makes the
-        * code easier, and CPU migration is not a fast path.
-        */
-       if (!pi_test_sn(pi_desc) && vcpu->cpu == cpu)
-               return;
-
-       /*
-        * If the 'nv' field is POSTED_INTR_WAKEUP_VECTOR, do not change
-        * PI.NDST: pi_post_block is the one expected to change PID.NDST and the
-        * wakeup handler expects the vCPU to be on the blocked_vcpu_list that
-        * matches PI.NDST. Otherwise, a vcpu may not be able to be woken up
-        * correctly.
-        */
-       if (pi_desc->nv == POSTED_INTR_WAKEUP_VECTOR || vcpu->cpu == cpu) {
-               pi_clear_sn(pi_desc);
-               goto after_clear_sn;
-       }
-
-       /* The full case.  */
-       do {
-               old.control = new.control = pi_desc->control;
-
-               dest = cpu_physical_id(cpu);
-
-               if (x2apic_enabled())
-                       new.ndst = dest;
-               else
-                       new.ndst = (dest << 8) & 0xFF00;
-
-               new.sn = 0;
-       } while (cmpxchg64(&pi_desc->control, old.control,
-                          new.control) != old.control);
-
-after_clear_sn:
-
-       /*
-        * Clear SN before reading the bitmap.  The VT-d firmware
-        * writes the bitmap and reads SN atomically (5.2.3 in the
-        * spec), so it doesn't really have a memory barrier that
-        * pairs with this, but we cannot do that and we need one.
-        */
-       smp_mb__after_atomic();
-
-       if (!pi_is_pir_empty(pi_desc))
-               pi_set_on(pi_desc);
-}
-
 void vmx_vcpu_load_vmcs(struct kvm_vcpu *vcpu, int cpu,
                        struct loaded_vmcs *buddy)
 {
@@ -1395,20 +1332,6 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
        vmx->host_debugctlmsr = get_debugctlmsr();
 }
 
-static void vmx_vcpu_pi_put(struct kvm_vcpu *vcpu)
-{
-       struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
-
-       if (!kvm_arch_has_assigned_device(vcpu->kvm) ||
-               !irq_remapping_cap(IRQ_POSTING_CAP)  ||
-               !kvm_vcpu_apicv_active(vcpu))
-               return;
-
-       /* Set SN when the vCPU is preempted */
-       if (vcpu->preempted)
-               pi_set_sn(pi_desc);
-}
-
 static void vmx_vcpu_put(struct kvm_vcpu *vcpu)
 {
        vmx_vcpu_pi_put(vcpu);
@@ -5408,25 +5331,6 @@ static void shrink_ple_window(struct kvm_vcpu *vcpu)
        }
 }
 
-/*
- * Handler for POSTED_INTERRUPT_WAKEUP_VECTOR.
- */
-static void wakeup_handler(void)
-{
-       struct kvm_vcpu *vcpu;
-       int cpu = smp_processor_id();
-
-       spin_lock(&per_cpu(blocked_vcpu_on_cpu_lock, cpu));
-       list_for_each_entry(vcpu, &per_cpu(blocked_vcpu_on_cpu, cpu),
-                       blocked_vcpu_list) {
-               struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
-
-               if (pi_test_on(pi_desc) == 1)
-                       kvm_vcpu_kick(vcpu);
-       }
-       spin_unlock(&per_cpu(blocked_vcpu_on_cpu_lock, cpu));
-}
-
 static void vmx_enable_tdp(void)
 {
        kvm_mmu_set_mask_ptes(VMX_EPT_READABLE_MASK,
@@ -6283,14 +6187,6 @@ static int vmx_sync_pir_to_irr(struct kvm_vcpu *vcpu)
        return max_irr;
 }
 
-static bool vmx_dy_apicv_has_pending_interrupt(struct kvm_vcpu *vcpu)
-{
-       struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
-
-       return pi_test_on(pi_desc) ||
-               (pi_test_sn(pi_desc) && !pi_is_pir_empty(pi_desc));
-}
-
 static void vmx_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap)
 {
        if (!kvm_vcpu_apicv_active(vcpu))
@@ -7432,107 +7328,6 @@ static void vmx_enable_log_dirty_pt_masked(struct kvm *kvm,
        kvm_mmu_clear_dirty_pt_masked(kvm, memslot, offset, mask);
 }
 
-static void __pi_post_block(struct kvm_vcpu *vcpu)
-{
-       struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
-       struct pi_desc old, new;
-       unsigned int dest;
-
-       do {
-               old.control = new.control = pi_desc->control;
-               WARN(old.nv != POSTED_INTR_WAKEUP_VECTOR,
-                    "Wakeup handler not enabled while the VCPU is blocked\n");
-
-               dest = cpu_physical_id(vcpu->cpu);
-
-               if (x2apic_enabled())
-                       new.ndst = dest;
-               else
-                       new.ndst = (dest << 8) & 0xFF00;
-
-               /* set 'NV' to 'notification vector' */
-               new.nv = POSTED_INTR_VECTOR;
-       } while (cmpxchg64(&pi_desc->control, old.control,
-                          new.control) != old.control);
-
-       if (!WARN_ON_ONCE(vcpu->pre_pcpu == -1)) {
-               spin_lock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu));
-               list_del(&vcpu->blocked_vcpu_list);
-               spin_unlock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu));
-               vcpu->pre_pcpu = -1;
-       }
-}
-
-/*
- * This routine does the following things for vCPU which is going
- * to be blocked if VT-d PI is enabled.
- * - Store the vCPU to the wakeup list, so when interrupts happen
- *   we can find the right vCPU to wake up.
- * - Change the Posted-interrupt descriptor as below:
- *      'NDST' <-- vcpu->pre_pcpu
- *      'NV' <-- POSTED_INTR_WAKEUP_VECTOR
- * - If 'ON' is set during this process, which means at least one
- *   interrupt is posted for this vCPU, we cannot block it, in
- *   this case, return 1, otherwise, return 0.
- *
- */
-static int pi_pre_block(struct kvm_vcpu *vcpu)
-{
-       unsigned int dest;
-       struct pi_desc old, new;
-       struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
-
-       if (!kvm_arch_has_assigned_device(vcpu->kvm) ||
-               !irq_remapping_cap(IRQ_POSTING_CAP)  ||
-               !kvm_vcpu_apicv_active(vcpu))
-               return 0;
-
-       WARN_ON(irqs_disabled());
-       local_irq_disable();
-       if (!WARN_ON_ONCE(vcpu->pre_pcpu != -1)) {
-               vcpu->pre_pcpu = vcpu->cpu;
-               spin_lock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu));
-               list_add_tail(&vcpu->blocked_vcpu_list,
-                             &per_cpu(blocked_vcpu_on_cpu,
-                                      vcpu->pre_pcpu));
-               spin_unlock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu));
-       }
-
-       do {
-               old.control = new.control = pi_desc->control;
-
-               WARN((pi_desc->sn == 1),
-                    "Warning: SN field of posted-interrupts "
-                    "is set before blocking\n");
-
-               /*
-                * Since vCPU can be preempted during this process,
-                * vcpu->cpu could be different with pre_pcpu, we
-                * need to set pre_pcpu as the destination of wakeup
-                * notification event, then we can find the right vCPU
-                * to wakeup in wakeup handler if interrupts happen
-                * when the vCPU is in blocked state.
-                */
-               dest = cpu_physical_id(vcpu->pre_pcpu);
-
-               if (x2apic_enabled())
-                       new.ndst = dest;
-               else
-                       new.ndst = (dest << 8) & 0xFF00;
-
-               /* set 'NV' to 'wakeup vector' */
-               new.nv = POSTED_INTR_WAKEUP_VECTOR;
-       } while (cmpxchg64(&pi_desc->control, old.control,
-                          new.control) != old.control);
-
-       /* We should not block the vCPU if an interrupt is posted for it.  */
-       if (pi_test_on(pi_desc) == 1)
-               __pi_post_block(vcpu);
-
-       local_irq_enable();
-       return (vcpu->pre_pcpu == -1);
-}
-
 static int vmx_pre_block(struct kvm_vcpu *vcpu)
 {
        if (pi_pre_block(vcpu))
@@ -7544,17 +7339,6 @@ static int vmx_pre_block(struct kvm_vcpu *vcpu)
        return 0;
 }
 
-static void pi_post_block(struct kvm_vcpu *vcpu)
-{
-       if (vcpu->pre_pcpu == -1)
-               return;
-
-       WARN_ON(irqs_disabled());
-       local_irq_disable();
-       __pi_post_block(vcpu);
-       local_irq_enable();
-}
-
 static void vmx_post_block(struct kvm_vcpu *vcpu)
 {
        if (kvm_x86_ops.set_hv_timer)
@@ -7563,100 +7347,6 @@ static void vmx_post_block(struct kvm_vcpu *vcpu)
        pi_post_block(vcpu);
 }
 
-/*
- * vmx_update_pi_irte - set IRTE for Posted-Interrupts
- *
- * @kvm: kvm
- * @host_irq: host irq of the interrupt
- * @guest_irq: gsi of the interrupt
- * @set: set or unset PI
- * returns 0 on success, < 0 on failure
- */
-static int vmx_update_pi_irte(struct kvm *kvm, unsigned int host_irq,
-                             uint32_t guest_irq, bool set)
-{
-       struct kvm_kernel_irq_routing_entry *e;
-       struct kvm_irq_routing_table *irq_rt;
-       struct kvm_lapic_irq irq;
-       struct kvm_vcpu *vcpu;
-       struct vcpu_data vcpu_info;
-       int idx, ret = 0;
-
-       if (!kvm_arch_has_assigned_device(kvm) ||
-               !irq_remapping_cap(IRQ_POSTING_CAP) ||
-               !kvm_vcpu_apicv_active(kvm->vcpus[0]))
-               return 0;
-
-       idx = srcu_read_lock(&kvm->irq_srcu);
-       irq_rt = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu);
-       if (guest_irq >= irq_rt->nr_rt_entries ||
-           hlist_empty(&irq_rt->map[guest_irq])) {
-               pr_warn_once("no route for guest_irq %u/%u (broken user space?)\n",
-                            guest_irq, irq_rt->nr_rt_entries);
-               goto out;
-       }
-
-       hlist_for_each_entry(e, &irq_rt->map[guest_irq], link) {
-               if (e->type != KVM_IRQ_ROUTING_MSI)
-                       continue;
-               /*
-                * VT-d PI cannot support posting multicast/broadcast
-                * interrupts to a vCPU, we still use interrupt remapping
-                * for these kind of interrupts.
-                *
-                * For lowest-priority interrupts, we only support
-                * those with single CPU as the destination, e.g. user
-                * configures the interrupts via /proc/irq or uses
-                * irqbalance to make the interrupts single-CPU.
-                *
-                * We will support full lowest-priority interrupt later.
-                *
-                * In addition, we can only inject generic interrupts using
-                * the PI mechanism, refuse to route others through it.
-                */
-
-               kvm_set_msi_irq(kvm, e, &irq);
-               if (!kvm_intr_is_single_vcpu(kvm, &irq, &vcpu) ||
-                   !kvm_irq_is_postable(&irq)) {
-                       /*
-                        * Make sure the IRTE is in remapped mode if
-                        * we don't handle it in posted mode.
-                        */
-                       ret = irq_set_vcpu_affinity(host_irq, NULL);
-                       if (ret < 0) {
-                               printk(KERN_INFO
-                                  "failed to back to remapped mode, irq: %u\n",
-                                  host_irq);
-                               goto out;
-                       }
-
-                       continue;
-               }
-
-               vcpu_info.pi_desc_addr = __pa(vcpu_to_pi_desc(vcpu));
-               vcpu_info.vector = irq.vector;
-
-               trace_kvm_pi_irte_update(host_irq, vcpu->vcpu_id, e->gsi,
-                               vcpu_info.vector, vcpu_info.pi_desc_addr, set);
-
-               if (set)
-                       ret = irq_set_vcpu_affinity(host_irq, &vcpu_info);
-               else
-                       ret = irq_set_vcpu_affinity(host_irq, NULL);
-
-               if (ret < 0) {
-                       printk(KERN_INFO "%s: failed to update PI IRTE\n",
-                                       __func__);
-                       goto out;
-               }
-       }
-
-       ret = 0;
-out:
-       srcu_read_unlock(&kvm->irq_srcu, idx);
-       return ret;
-}
-
 static void vmx_setup_mce(struct kvm_vcpu *vcpu)
 {
        if (vcpu->arch.mcg_cap & MCG_LMCE_P)
@@ -7820,7 +7510,7 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = {
        .guest_apic_has_interrupt = vmx_guest_apic_has_interrupt,
        .sync_pir_to_irr = vmx_sync_pir_to_irr,
        .deliver_posted_interrupt = vmx_deliver_posted_interrupt,
-       .dy_apicv_has_pending_interrupt = vmx_dy_apicv_has_pending_interrupt,
+       .dy_apicv_has_pending_interrupt = pi_has_pending_interrupt,
 
        .set_tss_addr = vmx_set_tss_addr,
        .set_identity_map_addr = vmx_set_identity_map_addr,
@@ -7854,7 +7544,7 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = {
        .pmu_ops = &intel_pmu_ops,
        .nested_ops = &vmx_nested_ops,
 
-       .update_pi_irte = vmx_update_pi_irte,
+       .update_pi_irte = pi_update_irte,
 
 #ifdef CONFIG_X86_64
        .set_hv_timer = vmx_set_hv_timer,
@@ -8020,7 +7710,7 @@ static __init int hardware_setup(void)
                vmx_x86_ops.request_immediate_exit = __kvm_request_immediate_exit;
        }
 
-       kvm_set_posted_intr_wakeup_handler(wakeup_handler);
+       kvm_set_posted_intr_wakeup_handler(pi_wakeup_handler);
 
        kvm_mce_cap_supported |= MCG_LMCE_P;
 
@@ -8159,8 +7849,8 @@ static int __init vmx_init(void)
 
        for_each_possible_cpu(cpu) {
                INIT_LIST_HEAD(&per_cpu(loaded_vmcss_on_cpu, cpu));
-               INIT_LIST_HEAD(&per_cpu(blocked_vcpu_on_cpu, cpu));
-               spin_lock_init(&per_cpu(blocked_vcpu_on_cpu_lock, cpu));
+
+               pi_init(cpu);
        }
 
 #ifdef CONFIG_KEXEC_CORE
index 941336dc5ee4883f5c25d809a18d7dc244b0f921..e0a655c7a0feb4c6c95b07dc30abadf3cc6a81c8 100644 (file)
@@ -10,6 +10,7 @@
 #include "capabilities.h"
 #include "kvm_cache_regs.h"
 #include "ops.h"
+#include "posted_intr.h"
 #include "vmcs.h"
 #include "cpuid.h"
 
@@ -49,29 +50,6 @@ enum segment_cache_field {
        SEG_FIELD_NR = 4
 };
 
-/* Posted-Interrupt Descriptor */
-struct pi_desc {
-       u32 pir[8];     /* Posted interrupt requested */
-       union {
-               struct {
-                               /* bit 256 - Outstanding Notification */
-                       u16     on      : 1,
-                               /* bit 257 - Suppress Notification */
-                               sn      : 1,
-                               /* bit 271:258 - Reserved */
-                               rsvd_1  : 14;
-                               /* bit 279:272 - Notification Vector */
-                       u8      nv;
-                               /* bit 287:280 - Reserved */
-                       u8      rsvd_2;
-                               /* bit 319:288 - Notification Destination */
-                       u32     ndst;
-               };
-               u64 control;
-       };
-       u32 rsvd[6];
-} __aligned(64);
-
 #define RTIT_ADDR_RANGE                4
 
 struct pt_ctx {
@@ -356,67 +334,6 @@ void vmx_update_host_rsp(struct vcpu_vmx *vmx, unsigned long host_rsp);
 int vmx_find_msr_index(struct vmx_msrs *m, u32 msr);
 void vmx_ept_load_pdptrs(struct kvm_vcpu *vcpu);
 
-#define POSTED_INTR_ON  0
-#define POSTED_INTR_SN  1
-
-static inline bool pi_test_and_set_on(struct pi_desc *pi_desc)
-{
-       return test_and_set_bit(POSTED_INTR_ON,
-                       (unsigned long *)&pi_desc->control);
-}
-
-static inline bool pi_test_and_clear_on(struct pi_desc *pi_desc)
-{
-       return test_and_clear_bit(POSTED_INTR_ON,
-                       (unsigned long *)&pi_desc->control);
-}
-
-static inline int pi_test_and_set_pir(int vector, struct pi_desc *pi_desc)
-{
-       return test_and_set_bit(vector, (unsigned long *)pi_desc->pir);
-}
-
-static inline bool pi_is_pir_empty(struct pi_desc *pi_desc)
-{
-       return bitmap_empty((unsigned long *)pi_desc->pir, NR_VECTORS);
-}
-
-static inline void pi_set_sn(struct pi_desc *pi_desc)
-{
-       set_bit(POSTED_INTR_SN,
-               (unsigned long *)&pi_desc->control);
-}
-
-static inline void pi_set_on(struct pi_desc *pi_desc)
-{
-       set_bit(POSTED_INTR_ON,
-               (unsigned long *)&pi_desc->control);
-}
-
-static inline void pi_clear_on(struct pi_desc *pi_desc)
-{
-       clear_bit(POSTED_INTR_ON,
-               (unsigned long *)&pi_desc->control);
-}
-
-static inline void pi_clear_sn(struct pi_desc *pi_desc)
-{
-       clear_bit(POSTED_INTR_SN,
-               (unsigned long *)&pi_desc->control);
-}
-
-static inline int pi_test_on(struct pi_desc *pi_desc)
-{
-       return test_bit(POSTED_INTR_ON,
-                       (unsigned long *)&pi_desc->control);
-}
-
-static inline int pi_test_sn(struct pi_desc *pi_desc)
-{
-       return test_bit(POSTED_INTR_SN,
-                       (unsigned long *)&pi_desc->control);
-}
-
 static inline u8 vmx_get_rvi(void)
 {
        return vmcs_read16(GUEST_INTR_STATUS) & 0xff;
@@ -497,11 +414,6 @@ static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu)
        return container_of(vcpu, struct vcpu_vmx, vcpu);
 }
 
-static inline struct pi_desc *vcpu_to_pi_desc(struct kvm_vcpu *vcpu)
-{
-       return &(to_vmx(vcpu)->pi_desc);
-}
-
 static inline unsigned long vmx_get_exit_qual(struct kvm_vcpu *vcpu)
 {
        struct vcpu_vmx *vmx = to_vmx(vcpu);