]> git.proxmox.com Git - mirror_ubuntu-jammy-kernel.git/blobdiff - virt/kvm/kvm_main.c
kvm: Rename make_all_cpus_request() to kvm_make_all_cpus_request() and make it non...
[mirror_ubuntu-jammy-kernel.git] / virt / kvm / kvm_main.c
index db57363cc2876ed9cf06b123342f9fc3b1904a65..5b8ca365932ad08490725d1287587e5d1cc9d078 100644 (file)
 
 #include <asm/processor.h>
 #include <asm/io.h>
+#include <asm/ioctl.h>
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
 
 #include "coalesced_mmio.h"
 #include "async_pf.h"
+#include "vfio.h"
 
 #define CREATE_TRACE_POINTS
 #include <trace/events/kvm.h>
@@ -151,7 +153,7 @@ static void ack_flush(void *_completed)
 {
 }
 
-static bool make_all_cpus_request(struct kvm *kvm, unsigned int req)
+bool kvm_make_all_cpus_request(struct kvm *kvm, unsigned int req)
 {
        int i, cpu, me;
        cpumask_var_t cpus;
@@ -188,7 +190,7 @@ void kvm_flush_remote_tlbs(struct kvm *kvm)
        long dirty_count = kvm->tlbs_dirty;
 
        smp_mb();
-       if (make_all_cpus_request(kvm, KVM_REQ_TLB_FLUSH))
+       if (kvm_make_all_cpus_request(kvm, KVM_REQ_TLB_FLUSH))
                ++kvm->stat.remote_tlb_flush;
        cmpxchg(&kvm->tlbs_dirty, dirty_count, 0);
 }
@@ -196,17 +198,17 @@ EXPORT_SYMBOL_GPL(kvm_flush_remote_tlbs);
 
 void kvm_reload_remote_mmus(struct kvm *kvm)
 {
-       make_all_cpus_request(kvm, KVM_REQ_MMU_RELOAD);
+       kvm_make_all_cpus_request(kvm, KVM_REQ_MMU_RELOAD);
 }
 
 void kvm_make_mclock_inprogress_request(struct kvm *kvm)
 {
-       make_all_cpus_request(kvm, KVM_REQ_MCLOCK_INPROGRESS);
+       kvm_make_all_cpus_request(kvm, KVM_REQ_MCLOCK_INPROGRESS);
 }
 
 void kvm_make_scan_ioapic_request(struct kvm *kvm)
 {
-       make_all_cpus_request(kvm, KVM_REQ_SCAN_IOAPIC);
+       kvm_make_all_cpus_request(kvm, KVM_REQ_SCAN_IOAPIC);
 }
 
 int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id)
@@ -367,7 +369,8 @@ static void kvm_mmu_notifier_invalidate_range_end(struct mmu_notifier *mn,
 
 static int kvm_mmu_notifier_clear_flush_young(struct mmu_notifier *mn,
                                              struct mm_struct *mm,
-                                             unsigned long address)
+                                             unsigned long start,
+                                             unsigned long end)
 {
        struct kvm *kvm = mmu_notifier_to_kvm(mn);
        int young, idx;
@@ -375,7 +378,7 @@ static int kvm_mmu_notifier_clear_flush_young(struct mmu_notifier *mn,
        idx = srcu_read_lock(&kvm->srcu);
        spin_lock(&kvm->mmu_lock);
 
-       young = kvm_age_hva(kvm, address);
+       young = kvm_age_hva(kvm, start, end);
        if (young)
                kvm_flush_remote_tlbs(kvm);
 
@@ -1121,6 +1124,43 @@ static int get_user_page_nowait(struct task_struct *tsk, struct mm_struct *mm,
        return __get_user_pages(tsk, mm, start, 1, flags, page, NULL, NULL);
 }
 
+int kvm_get_user_page_io(struct task_struct *tsk, struct mm_struct *mm,
+                        unsigned long addr, bool write_fault,
+                        struct page **pagep)
+{
+       int npages;
+       int locked = 1;
+       int flags = FOLL_TOUCH | FOLL_HWPOISON |
+                   (pagep ? FOLL_GET : 0) |
+                   (write_fault ? FOLL_WRITE : 0);
+
+       /*
+        * If retrying the fault, we get here *not* having allowed the filemap
+        * to wait on the page lock. We should now allow waiting on the IO with
+        * the mmap semaphore released.
+        */
+       down_read(&mm->mmap_sem);
+       npages = __get_user_pages(tsk, mm, addr, 1, flags, pagep, NULL,
+                                 &locked);
+       if (!locked) {
+               VM_BUG_ON(npages != -EBUSY);
+
+               if (!pagep)
+                       return 0;
+
+               /*
+                * The previous call has now waited on the IO. Now we can
+                * retry and complete. Pass TRIED to ensure we do not re
+                * schedule async IO (see e.g. filemap_fault).
+                */
+               down_read(&mm->mmap_sem);
+               npages = __get_user_pages(tsk, mm, addr, 1, flags | FOLL_TRIED,
+                                         pagep, NULL, NULL);
+       }
+       up_read(&mm->mmap_sem);
+       return npages;
+}
+
 static inline int check_user_page_hwpoison(unsigned long addr)
 {
        int rc, flags = FOLL_TOUCH | FOLL_HWPOISON | FOLL_WRITE;
@@ -1183,9 +1223,15 @@ static int hva_to_pfn_slow(unsigned long addr, bool *async, bool write_fault,
                npages = get_user_page_nowait(current, current->mm,
                                              addr, write_fault, page);
                up_read(&current->mm->mmap_sem);
-       } else
-               npages = get_user_pages_fast(addr, 1, write_fault,
-                                            page);
+       } else {
+               /*
+                * By now we have tried gup_fast, and possibly async_pf, and we
+                * are certainly not atomic. Time to retry the gup, allowing
+                * mmap semaphore to be relinquished in the case of IO.
+                */
+               npages = kvm_get_user_page_io(current, current->mm, addr,
+                                             write_fault, page);
+       }
        if (npages != 1)
                return npages;
 
@@ -1988,6 +2034,9 @@ static long kvm_vcpu_ioctl(struct file *filp,
        if (vcpu->kvm->mm != current->mm)
                return -EIO;
 
+       if (unlikely(_IOC_TYPE(ioctl) != KVMIO))
+               return -EINVAL;
+
 #if defined(CONFIG_S390) || defined(CONFIG_PPC) || defined(CONFIG_MIPS)
        /*
         * Special cases: vcpu ioctls that are asynchronous to vcpu execution,
@@ -3226,6 +3275,9 @@ int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align,
                goto out_undebugfs;
        }
 
+       r = kvm_vfio_ops_init();
+       WARN_ON(r);
+
        return 0;
 
 out_undebugfs: