]> git.proxmox.com Git - mirror_ubuntu-kernels.git/blobdiff - arch/powerpc/kvm/book3s_64_mmu_hv.c
Merge tag 'powerpc-5.7-1' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc...
[mirror_ubuntu-kernels.git] / arch / powerpc / kvm / book3s_64_mmu_hv.c
index 8b4eac0c9dcdb4704f75263d8ef64d1e34bd4a52..6404df613ea36a0783609b45df72fde12cf9c7ed 100644 (file)
@@ -485,18 +485,18 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
        __be64 *hptep;
        unsigned long mmu_seq, psize, pte_size;
        unsigned long gpa_base, gfn_base;
-       unsigned long gpa, gfn, hva, pfn;
+       unsigned long gpa, gfn, hva, pfn, hpa;
        struct kvm_memory_slot *memslot;
        unsigned long *rmap;
        struct revmap_entry *rev;
-       struct page *page, *pages[1];
-       long index, ret, npages;
+       struct page *page;
+       long index, ret;
        bool is_ci;
-       unsigned int writing, write_ok;
-       struct vm_area_struct *vma;
+       bool writing, write_ok;
+       unsigned int shift;
        unsigned long rcbits;
        long mmio_update;
-       struct mm_struct *mm;
+       pte_t pte, *ptep;
 
        if (kvm_is_radix(kvm))
                return kvmppc_book3s_radix_page_fault(run, vcpu, ea, dsisr);
@@ -570,59 +570,62 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
        smp_rmb();
 
        ret = -EFAULT;
-       is_ci = false;
-       pfn = 0;
        page = NULL;
-       mm = kvm->mm;
-       pte_size = PAGE_SIZE;
        writing = (dsisr & DSISR_ISSTORE) != 0;
        /* If writing != 0, then the HPTE must allow writing, if we get here */
        write_ok = writing;
        hva = gfn_to_hva_memslot(memslot, gfn);
-       npages = get_user_pages_fast(hva, 1, writing ? FOLL_WRITE : 0, pages);
-       if (npages < 1) {
-               /* Check if it's an I/O mapping */
-               down_read(&mm->mmap_sem);
-               vma = find_vma(mm, hva);
-               if (vma && vma->vm_start <= hva && hva + psize <= vma->vm_end &&
-                   (vma->vm_flags & VM_PFNMAP)) {
-                       pfn = vma->vm_pgoff +
-                               ((hva - vma->vm_start) >> PAGE_SHIFT);
-                       pte_size = psize;
-                       is_ci = pte_ci(__pte((pgprot_val(vma->vm_page_prot))));
-                       write_ok = vma->vm_flags & VM_WRITE;
-               }
-               up_read(&mm->mmap_sem);
-               if (!pfn)
-                       goto out_put;
+
+       /*
+        * Do a fast check first, since __gfn_to_pfn_memslot doesn't
+        * do it with !atomic && !async, which is how we call it.
+        * We always ask for write permission since the common case
+        * is that the page is writable.
+        */
+       if (__get_user_pages_fast(hva, 1, 1, &page) == 1) {
+               write_ok = true;
        } else {
-               page = pages[0];
-               pfn = page_to_pfn(page);
-               if (PageHuge(page)) {
-                       page = compound_head(page);
-                       pte_size <<= compound_order(page);
-               }
-               /* if the guest wants write access, see if that is OK */
-               if (!writing && hpte_is_writable(r)) {
-                       pte_t *ptep, pte;
-                       unsigned long flags;
-                       /*
-                        * We need to protect against page table destruction
-                        * hugepage split and collapse.
-                        */
-                       local_irq_save(flags);
-                       ptep = find_current_mm_pte(mm->pgd, hva, NULL, NULL);
-                       if (ptep) {
-                               pte = kvmppc_read_update_linux_pte(ptep, 1);
-                               if (__pte_write(pte))
-                                       write_ok = 1;
-                       }
-                       local_irq_restore(flags);
+               /* Call KVM generic code to do the slow-path check */
+               pfn = __gfn_to_pfn_memslot(memslot, gfn, false, NULL,
+                                          writing, &write_ok);
+               if (is_error_noslot_pfn(pfn))
+                       return -EFAULT;
+               page = NULL;
+               if (pfn_valid(pfn)) {
+                       page = pfn_to_page(pfn);
+                       if (PageReserved(page))
+                               page = NULL;
                }
        }
 
+       /*
+        * Read the PTE from the process' radix tree and use that
+        * so we get the shift and attribute bits.
+        */
+       local_irq_disable();
+       ptep = __find_linux_pte(vcpu->arch.pgdir, hva, NULL, &shift);
+       /*
+        * If the PTE disappeared temporarily due to a THP
+        * collapse, just return and let the guest try again.
+        */
+       if (!ptep) {
+               local_irq_enable();
+               if (page)
+                       put_page(page);
+               return RESUME_GUEST;
+       }
+       pte = *ptep;
+       local_irq_enable();
+       hpa = pte_pfn(pte) << PAGE_SHIFT;
+       pte_size = PAGE_SIZE;
+       if (shift)
+               pte_size = 1ul << shift;
+       is_ci = pte_ci(pte);
+
        if (psize > pte_size)
                goto out_put;
+       if (pte_size > psize)
+               hpa |= hva & (pte_size - psize);
 
        /* Check WIMG vs. the actual page we're accessing */
        if (!hpte_cache_flags_ok(r, is_ci)) {
@@ -636,14 +639,13 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
        }
 
        /*
-        * Set the HPTE to point to pfn.
-        * Since the pfn is at PAGE_SIZE granularity, make sure we
+        * Set the HPTE to point to hpa.
+        * Since the hpa is at PAGE_SIZE granularity, make sure we
         * don't mask out lower-order bits if psize < PAGE_SIZE.
         */
        if (psize < PAGE_SIZE)
                psize = PAGE_SIZE;
-       r = (r & HPTE_R_KEY_HI) | (r & ~(HPTE_R_PP0 - psize)) |
-                                       ((pfn << PAGE_SHIFT) & ~(psize - 1));
+       r = (r & HPTE_R_KEY_HI) | (r & ~(HPTE_R_PP0 - psize)) | hpa;
        if (hpte_is_writable(r) && !write_ok)
                r = hpte_make_readonly(r);
        ret = RESUME_GUEST;
@@ -708,20 +710,13 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
        asm volatile("ptesync" : : : "memory");
        preempt_enable();
        if (page && hpte_is_writable(r))
-               SetPageDirty(page);
+               set_page_dirty_lock(page);
 
  out_put:
        trace_kvm_page_fault_exit(vcpu, hpte, ret);
 
-       if (page) {
-               /*
-                * We drop pages[0] here, not page because page might
-                * have been set to the head page of a compound, but
-                * we have to drop the reference on the correct tail
-                * page to match the get inside gup()
-                */
-               put_page(pages[0]);
-       }
+       if (page)
+               put_page(page);
        return ret;
 
  out_unlock: