mmap locking API: use coccinelle to convert mmap_sem rwsem call sites

[mirror_ubuntu-kernels.git] / virt / kvm / kvm_main.c
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c

index 731c1e517716f8f26c736bcdb3527fdb1a7743ff..0dfee7576e88c1bcee31707aaafe76f5552ada92 100644 (file)
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -55,7 +55,6 @@
  #include <asm/processor.h>
  #include <asm/ioctl.h>
  #include <linux/uaccess.h>
-#include <asm/pgtable.h>
  
  #include "coalesced_mmio.h"
  #include "async_pf.h"
@@ -349,7 +348,7 @@ static void kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id)
         vcpu->kvm = kvm;
         vcpu->vcpu_id = id;
         vcpu->pid = NULL;
-       init_swait_queue_head(&vcpu->wq);
+       rcuwait_init(&vcpu->wait);
         kvm_async_pf_vcpu_init(vcpu);
  
         vcpu->pre_pcpu = -1;
@@ -718,6 +717,8 @@ static struct kvm *kvm_create_vm(unsigned long type)
                         goto out_err_no_arch_destroy_vm;
         }
  
+       kvm->max_halt_poll_ns = halt_poll_ns;
+
         r = kvm_arch_init_vm(kvm, type);
         if (r)
                 goto out_err_no_arch_destroy_vm;
@@ -1223,10 +1224,9 @@ int __kvm_set_memory_region(struct kvm *kvm,
         if (mem->guest_phys_addr & (PAGE_SIZE - 1))
                 return -EINVAL;
         /* We can read the guest memory with __xxx_user() later on. */
-       if ((id < KVM_USER_MEM_SLOTS) &&
-           ((mem->userspace_addr & (PAGE_SIZE - 1)) ||
+       if ((mem->userspace_addr & (PAGE_SIZE - 1)) ||
              !access_ok((void __user *)(unsigned long)mem->userspace_addr,
-                       mem->memory_size)))
+                       mem->memory_size))
                 return -EINVAL;
         if (as_id >= KVM_ADDRESS_SPACE_NUM || id >= KVM_MEM_SLOTS_NUM)
                 return -EINVAL;
@@ -1610,16 +1610,13 @@ struct kvm_memory_slot *kvm_vcpu_gfn_to_memslot(struct kvm_vcpu *vcpu, gfn_t gfn
  {
         return __gfn_to_memslot(kvm_vcpu_memslots(vcpu), gfn);
  }
+EXPORT_SYMBOL_GPL(kvm_vcpu_gfn_to_memslot);
  
  bool kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn)
  {
         struct kvm_memory_slot *memslot = gfn_to_memslot(kvm, gfn);
  
-       if (!memslot || memslot->id >= KVM_USER_MEM_SLOTS ||
-             memslot->flags & KVM_MEMSLOT_INVALID)
-               return false;
-
-       return true;
+       return kvm_is_visible_memslot(memslot);
  }
  EXPORT_SYMBOL_GPL(kvm_is_visible_gfn);
  
@@ -1634,7 +1631,7 @@ unsigned long kvm_host_page_size(struct kvm_vcpu *vcpu, gfn_t gfn)
         if (kvm_is_error_hva(addr))
                 return PAGE_SIZE;
  
-       down_read(&current->mm->mmap_sem);
+       mmap_read_lock(current->mm);
         vma = find_vma(current->mm, addr);
         if (!vma)
                 goto out;
@@ -1642,7 +1639,7 @@ unsigned long kvm_host_page_size(struct kvm_vcpu *vcpu, gfn_t gfn)
         size = vma_kernel_pagesize(vma);
  
  out:
-       up_read(&current->mm->mmap_sem);
+       mmap_read_unlock(current->mm);
  
         return size;
  }
@@ -1742,7 +1739,6 @@ static bool hva_to_pfn_fast(unsigned long addr, bool write_fault,
                             bool *writable, kvm_pfn_t *pfn)
  {
         struct page *page[1];
-       int npages;
  
         /*
          * Fast pin a writable pfn only if it is a write fault request
@@ -1752,8 +1748,7 @@ static bool hva_to_pfn_fast(unsigned long addr, bool write_fault,
         if (!(write_fault || writable))
                 return false;
  
-       npages = __get_user_pages_fast(addr, 1, 1, page);
-       if (npages == 1) {
+       if (get_user_page_fast_only(addr, FOLL_WRITE, page)) {
                 *pfn = page_to_pfn(page[0]);
  
                 if (writable)
@@ -1793,7 +1788,7 @@ static int hva_to_pfn_slow(unsigned long addr, bool *async, bool write_fault,
         if (unlikely(!write_fault) && writable) {
                 struct page *wpage;
  
-               if (__get_user_pages_fast(addr, 1, 1, &wpage) == 1) {
+               if (get_user_page_fast_only(addr, FOLL_WRITE, &wpage)) {
                         *writable = true;
                         put_page(page);
                         page = wpage;
@@ -1897,7 +1892,7 @@ static kvm_pfn_t hva_to_pfn(unsigned long addr, bool atomic, bool *async,
         if (npages == 1)
                 return pfn;
  
-       down_read(&current->mm->mmap_sem);
+       mmap_read_lock(current->mm);
         if (npages == -EHWPOISON ||
               (!async && check_user_page_hwpoison(addr))) {
                 pfn = KVM_PFN_ERR_HWPOISON;
@@ -1921,7 +1916,7 @@ retry:
                 pfn = KVM_PFN_ERR_FAULT;
         }
  exit:
-       up_read(&current->mm->mmap_sem);
+       mmap_read_unlock(current->mm);
         return pfn;
  }
  
@@ -2005,7 +2000,7 @@ int gfn_to_page_many_atomic(struct kvm_memory_slot *slot, gfn_t gfn,
         if (entry < nr_pages)
                 return 0;
  
-       return __get_user_pages_fast(addr, nr_pages, 1, pages);
+       return get_user_pages_fast_only(addr, nr_pages, FOLL_WRITE, pages);
  }
  EXPORT_SYMBOL_GPL(gfn_to_page_many_atomic);
  
@@ -2511,13 +2506,15 @@ int kvm_write_guest_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc,
  }
  EXPORT_SYMBOL_GPL(kvm_write_guest_cached);
  
-int kvm_read_guest_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc,
-                          void *data, unsigned long len)
+int kvm_read_guest_offset_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc,
+                                void *data, unsigned int offset,
+                                unsigned long len)
  {
         struct kvm_memslots *slots = kvm_memslots(kvm);
         int r;
+       gpa_t gpa = ghc->gpa + offset;
  
-       BUG_ON(len > ghc->len);
+       BUG_ON(len + offset > ghc->len);
  
         if (slots->generation != ghc->generation) {
                 if (__kvm_gfn_to_hva_cache_init(slots, ghc, ghc->gpa, ghc->len))
@@ -2528,14 +2525,21 @@ int kvm_read_guest_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc,
                 return -EFAULT;
  
         if (unlikely(!ghc->memslot))
-               return kvm_read_guest(kvm, ghc->gpa, data, len);
+               return kvm_read_guest(kvm, gpa, data, len);
  
-       r = __copy_from_user(data, (void __user *)ghc->hva, len);
+       r = __copy_from_user(data, (void __user *)ghc->hva + offset, len);
         if (r)
                 return -EFAULT;
  
         return 0;
  }
+EXPORT_SYMBOL_GPL(kvm_read_guest_offset_cached);
+
+int kvm_read_guest_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc,
+                         void *data, unsigned long len)
+{
+       return kvm_read_guest_offset_cached(kvm, ghc, data, 0, len);
+}
  EXPORT_SYMBOL_GPL(kvm_read_guest_cached);
  
  int kvm_clear_guest_page(struct kvm *kvm, gfn_t gfn, int offset, int len)
@@ -2673,19 +2677,27 @@ out:
         return ret;
  }
  
+static inline void
+update_halt_poll_stats(struct kvm_vcpu *vcpu, u64 poll_ns, bool waited)
+{
+       if (waited)
+               vcpu->stat.halt_poll_fail_ns += poll_ns;
+       else
+               vcpu->stat.halt_poll_success_ns += poll_ns;
+}
+
  /*
   * The vCPU has executed a HLT instruction with in-kernel mode enabled.
   */
  void kvm_vcpu_block(struct kvm_vcpu *vcpu)
  {
-       ktime_t start, cur;
-       DECLARE_SWAITQUEUE(wait);
+       ktime_t start, cur, poll_end;
         bool waited = false;
         u64 block_ns;
  
         kvm_arch_vcpu_blocking(vcpu);
  
-       start = cur = ktime_get();
+       start = cur = poll_end = ktime_get();
         if (vcpu->halt_poll_ns && !kvm_arch_no_poll(vcpu)) {
                 ktime_t stop = ktime_add_ns(ktime_get(), vcpu->halt_poll_ns);
  
@@ -2701,12 +2713,13 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu)
                                         ++vcpu->stat.halt_poll_invalid;
                                 goto out;
                         }
-                       cur = ktime_get();
+                       poll_end = cur = ktime_get();
                 } while (single_task_running() && ktime_before(cur, stop));
         }
  
+       prepare_to_rcuwait(&vcpu->wait);
         for (;;) {
-               prepare_to_swait_exclusive(&vcpu->wq, &wait, TASK_INTERRUPTIBLE);
+               set_current_state(TASK_INTERRUPTIBLE);
  
                 if (kvm_vcpu_check_block(vcpu) < 0)
                         break;
@@ -2714,25 +2727,28 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu)
                 waited = true;
                 schedule();
         }
-
-       finish_swait(&vcpu->wq, &wait);
+       finish_rcuwait(&vcpu->wait);
         cur = ktime_get();
  out:
         kvm_arch_vcpu_unblocking(vcpu);
         block_ns = ktime_to_ns(cur) - ktime_to_ns(start);
  
+       update_halt_poll_stats(
+               vcpu, ktime_to_ns(ktime_sub(poll_end, start)), waited);
+
         if (!kvm_arch_no_poll(vcpu)) {
                 if (!vcpu_valid_wakeup(vcpu)) {
                         shrink_halt_poll_ns(vcpu);
-               } else if (halt_poll_ns) {
+               } else if (vcpu->kvm->max_halt_poll_ns) {
                         if (block_ns <= vcpu->halt_poll_ns)
                                 ;
                         /* we had a long block, shrink polling */
-                       else if (vcpu->halt_poll_ns && block_ns > halt_poll_ns)
+                       else if (vcpu->halt_poll_ns &&
+                                       block_ns > vcpu->kvm->max_halt_poll_ns)
                                 shrink_halt_poll_ns(vcpu);
                         /* we had a short halt and our poll time is too small */
-                       else if (vcpu->halt_poll_ns < halt_poll_ns &&
-                               block_ns < halt_poll_ns)
+                       else if (vcpu->halt_poll_ns < vcpu->kvm->max_halt_poll_ns &&
+                                       block_ns < vcpu->kvm->max_halt_poll_ns)
                                 grow_halt_poll_ns(vcpu);
                 } else {
                         vcpu->halt_poll_ns = 0;
@@ -2746,11 +2762,10 @@ EXPORT_SYMBOL_GPL(kvm_vcpu_block);
  
  bool kvm_vcpu_wake_up(struct kvm_vcpu *vcpu)
  {
-       struct swait_queue_head *wqp;
+       struct rcuwait *waitp;
  
-       wqp = kvm_arch_vcpu_wq(vcpu);
-       if (swq_has_sleeper(wqp)) {
-               swake_up_one(wqp);
+       waitp = kvm_arch_vcpu_get_wait(vcpu);
+       if (rcuwait_wake_up(waitp)) {
                 WRITE_ONCE(vcpu->ready, true);
                 ++vcpu->stat.halt_wakeup;
                 return true;
@@ -2807,7 +2822,7 @@ EXPORT_SYMBOL_GPL(kvm_vcpu_yield_to);
   *
   *  (a) VCPU which has not done pl-exit or cpu relax intercepted recently
   *  (preempted lock holder), indicated by @in_spin_loop.
- *  Set at the beiginning and cleared at the end of interception/PLE handler.
+ *  Set at the beginning and cleared at the end of interception/PLE handler.
   *
   *  (b) VCPU which has done pl-exit/ cpu relax intercepted but did not get
   *  chance last time (mostly it has become eligible now since we have probably
@@ -2892,7 +2907,8 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *me, bool yield_to_kernel_mode)
                                 continue;
                         if (vcpu == me)
                                 continue;
-                       if (swait_active(&vcpu->wq) && !vcpu_dy_runnable(vcpu))
+                       if (rcuwait_active(&vcpu->wait) &&
+                           !vcpu_dy_runnable(vcpu))
                                 continue;
                         if (READ_ONCE(vcpu->preempted) && yield_to_kernel_mode &&
                                 !kvm_arch_vcpu_in_kernel(vcpu))
@@ -3039,8 +3055,6 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, u32 id)
         if (r)
                 goto vcpu_free_run_page;
  
-       kvm_create_vcpu_debugfs(vcpu);
-
         mutex_lock(&kvm->lock);
         if (kvm_get_vcpu_by_id(kvm, id)) {
                 r = -EEXIST;
@@ -3069,11 +3083,11 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, u32 id)
  
         mutex_unlock(&kvm->lock);
         kvm_arch_vcpu_postcreate(vcpu);
+       kvm_create_vcpu_debugfs(vcpu);
         return r;
  
  unlock_vcpu_destroy:
         mutex_unlock(&kvm->lock);
-       debugfs_remove_recursive(vcpu->debugfs_dentry);
         kvm_arch_vcpu_destroy(vcpu);
  vcpu_free_run_page:
         free_page((unsigned long)vcpu->run);
@@ -3143,7 +3157,7 @@ static long kvm_vcpu_ioctl(struct file *filp,
                                 synchronize_rcu();
                         put_pid(oldpid);
                 }
-               r = kvm_arch_vcpu_ioctl_run(vcpu, vcpu->run);
+               r = kvm_arch_vcpu_ioctl_run(vcpu);
                 trace_kvm_userspace_exit(vcpu->run->exit_reason, r);
                 break;
         }
@@ -3168,7 +3182,6 @@ out_free1:
         case KVM_SET_REGS: {
                 struct kvm_regs *kvm_regs;
  
-               r = -ENOMEM;
                 kvm_regs = memdup_user(argp, sizeof(*kvm_regs));
                 if (IS_ERR(kvm_regs)) {
                         r = PTR_ERR(kvm_regs);
@@ -3524,6 +3537,7 @@ static long kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg)
         case KVM_CAP_IOEVENTFD_ANY_LENGTH:
         case KVM_CAP_CHECK_EXTENSION_VM:
         case KVM_CAP_ENABLE_CAP_VM:
+       case KVM_CAP_HALT_POLL:
                 return 1;
  #ifdef CONFIG_KVM_MMIO
         case KVM_CAP_COALESCED_MMIO:
@@ -3574,6 +3588,13 @@ static int kvm_vm_ioctl_enable_cap_generic(struct kvm *kvm,
                 return 0;
         }
  #endif
+       case KVM_CAP_HALT_POLL: {
+               if (cap->flags || cap->args[0] != (unsigned int)cap->args[0])
+                       return -EINVAL;
+
+               kvm->max_halt_poll_ns = cap->args[0];
+               return 0;
+       }
         default:
                 return kvm_vm_ioctl_enable_cap(kvm, cap);
         }
@@ -4647,6 +4668,7 @@ struct kvm_vcpu *kvm_get_running_vcpu(void)
  
         return vcpu;
  }
+EXPORT_SYMBOL_GPL(kvm_get_running_vcpu);
  
  /**
   * kvm_get_running_vcpus - get the per-CPU array of currently running vcpus.