kvm: Rename make_all_cpus_request() to kvm_make_all_cpus_request() and make it non...

[mirror_ubuntu-jammy-kernel.git] / virt / kvm / kvm_main.c
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c

index db57363cc2876ed9cf06b123342f9fc3b1904a65..5b8ca365932ad08490725d1287587e5d1cc9d078 100644 (file)
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -52,11 +52,13 @@
  
  #include <asm/processor.h>
  #include <asm/io.h>
+#include <asm/ioctl.h>
  #include <asm/uaccess.h>
  #include <asm/pgtable.h>
  
  #include "coalesced_mmio.h"
  #include "async_pf.h"
+#include "vfio.h"
  
  #define CREATE_TRACE_POINTS
  #include <trace/events/kvm.h>
@@ -151,7 +153,7 @@ static void ack_flush(void *_completed)
  {
  }
  
-static bool make_all_cpus_request(struct kvm *kvm, unsigned int req)
+bool kvm_make_all_cpus_request(struct kvm *kvm, unsigned int req)
  {
         int i, cpu, me;
         cpumask_var_t cpus;
@@ -188,7 +190,7 @@ void kvm_flush_remote_tlbs(struct kvm *kvm)
         long dirty_count = kvm->tlbs_dirty;
  
         smp_mb();
-       if (make_all_cpus_request(kvm, KVM_REQ_TLB_FLUSH))
+       if (kvm_make_all_cpus_request(kvm, KVM_REQ_TLB_FLUSH))
                 ++kvm->stat.remote_tlb_flush;
         cmpxchg(&kvm->tlbs_dirty, dirty_count, 0);
  }
@@ -196,17 +198,17 @@ EXPORT_SYMBOL_GPL(kvm_flush_remote_tlbs);
  
  void kvm_reload_remote_mmus(struct kvm *kvm)
  {
-       make_all_cpus_request(kvm, KVM_REQ_MMU_RELOAD);
+       kvm_make_all_cpus_request(kvm, KVM_REQ_MMU_RELOAD);
  }
  
  void kvm_make_mclock_inprogress_request(struct kvm *kvm)
  {
-       make_all_cpus_request(kvm, KVM_REQ_MCLOCK_INPROGRESS);
+       kvm_make_all_cpus_request(kvm, KVM_REQ_MCLOCK_INPROGRESS);
  }
  
  void kvm_make_scan_ioapic_request(struct kvm *kvm)
  {
-       make_all_cpus_request(kvm, KVM_REQ_SCAN_IOAPIC);
+       kvm_make_all_cpus_request(kvm, KVM_REQ_SCAN_IOAPIC);
  }
  
  int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id)
@@ -367,7 +369,8 @@ static void kvm_mmu_notifier_invalidate_range_end(struct mmu_notifier *mn,
  
  static int kvm_mmu_notifier_clear_flush_young(struct mmu_notifier *mn,
                                               struct mm_struct *mm,
-                                             unsigned long address)
+                                             unsigned long start,
+                                             unsigned long end)
  {
         struct kvm *kvm = mmu_notifier_to_kvm(mn);
         int young, idx;
@@ -375,7 +378,7 @@ static int kvm_mmu_notifier_clear_flush_young(struct mmu_notifier *mn,
         idx = srcu_read_lock(&kvm->srcu);
         spin_lock(&kvm->mmu_lock);
  
-       young = kvm_age_hva(kvm, address);
+       young = kvm_age_hva(kvm, start, end);
         if (young)
                 kvm_flush_remote_tlbs(kvm);
  
@@ -1121,6 +1124,43 @@ static int get_user_page_nowait(struct task_struct *tsk, struct mm_struct *mm,
         return __get_user_pages(tsk, mm, start, 1, flags, page, NULL, NULL);
  }
  
+int kvm_get_user_page_io(struct task_struct *tsk, struct mm_struct *mm,
+                        unsigned long addr, bool write_fault,
+                        struct page **pagep)
+{
+       int npages;
+       int locked = 1;
+       int flags = FOLL_TOUCH | FOLL_HWPOISON |
+                   (pagep ? FOLL_GET : 0) |
+                   (write_fault ? FOLL_WRITE : 0);
+
+       /*
+        * If retrying the fault, we get here *not* having allowed the filemap
+        * to wait on the page lock. We should now allow waiting on the IO with
+        * the mmap semaphore released.
+        */
+       down_read(&mm->mmap_sem);
+       npages = __get_user_pages(tsk, mm, addr, 1, flags, pagep, NULL,
+                                 &locked);
+       if (!locked) {
+               VM_BUG_ON(npages != -EBUSY);
+
+               if (!pagep)
+                       return 0;
+
+               /*
+                * The previous call has now waited on the IO. Now we can
+                * retry and complete. Pass TRIED to ensure we do not re
+                * schedule async IO (see e.g. filemap_fault).
+                */
+               down_read(&mm->mmap_sem);
+               npages = __get_user_pages(tsk, mm, addr, 1, flags | FOLL_TRIED,
+                                         pagep, NULL, NULL);
+       }
+       up_read(&mm->mmap_sem);
+       return npages;
+}
+
  static inline int check_user_page_hwpoison(unsigned long addr)
  {
         int rc, flags = FOLL_TOUCH | FOLL_HWPOISON | FOLL_WRITE;
@@ -1183,9 +1223,15 @@ static int hva_to_pfn_slow(unsigned long addr, bool *async, bool write_fault,
                 npages = get_user_page_nowait(current, current->mm,
                                               addr, write_fault, page);
                 up_read(&current->mm->mmap_sem);
-       } else
-               npages = get_user_pages_fast(addr, 1, write_fault,
-                                            page);
+       } else {
+               /*
+                * By now we have tried gup_fast, and possibly async_pf, and we
+                * are certainly not atomic. Time to retry the gup, allowing
+                * mmap semaphore to be relinquished in the case of IO.
+                */
+               npages = kvm_get_user_page_io(current, current->mm, addr,
+                                             write_fault, page);
+       }
         if (npages != 1)
                 return npages;
  
@@ -1988,6 +2034,9 @@ static long kvm_vcpu_ioctl(struct file *filp,
         if (vcpu->kvm->mm != current->mm)
                 return -EIO;
  
+       if (unlikely(_IOC_TYPE(ioctl) != KVMIO))
+               return -EINVAL;
+
  #if defined(CONFIG_S390) || defined(CONFIG_PPC) || defined(CONFIG_MIPS)
         /*
          * Special cases: vcpu ioctls that are asynchronous to vcpu execution,
@@ -3226,6 +3275,9 @@ int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align,
                 goto out_undebugfs;
         }
  
+       r = kvm_vfio_ops_init();
+       WARN_ON(r);
+
         return 0;
  
  out_undebugfs: