]> git.proxmox.com Git - mirror_ubuntu-bionic-kernel.git/blobdiff - mm/memory.c
UBUNTU: Ubuntu-4.15.0-96.97
[mirror_ubuntu-bionic-kernel.git] / mm / memory.c
index f3329558cc851807df765125a71e002cc44b0c1c..9a779c0d31b5a06d6146f6a245b4bf07d57003b8 100644 (file)
@@ -1805,14 +1805,21 @@ static int insert_pfn(struct vm_area_struct *vma, unsigned long addr,
                         * in may not match the PFN we have mapped if the
                         * mapped PFN is a writeable COW page.  In the mkwrite
                         * case we are creating a writable PTE for a shared
-                        * mapping and we expect the PFNs to match.
+                        * mapping and we expect the PFNs to match. If they
+                        * don't match, we are likely racing with block
+                        * allocation and mapping invalidation so just skip the
+                        * update.
                         */
-                       if (WARN_ON_ONCE(pte_pfn(*pte) != pfn_t_to_pfn(pfn)))
+                       if (pte_pfn(*pte) != pfn_t_to_pfn(pfn)) {
+                               WARN_ON_ONCE(!is_zero_pfn(pte_pfn(*pte)));
                                goto out_unlock;
-                       entry = *pte;
-                       goto out_mkwrite;
-               } else
-                       goto out_unlock;
+                       }
+                       entry = pte_mkyoung(*pte);
+                       entry = maybe_mkwrite(pte_mkdirty(entry), vma);
+                       if (ptep_set_access_flags(vma, addr, pte, entry, 1))
+                               update_mmu_cache(vma, addr, pte);
+               }
+               goto out_unlock;
        }
 
        /* Ok, finally just insert the thing.. */
@@ -1821,7 +1828,6 @@ static int insert_pfn(struct vm_area_struct *vma, unsigned long addr,
        else
                entry = pte_mkspecial(pfn_t_pte(pfn, prot));
 
-out_mkwrite:
        if (mkwrite) {
                entry = pte_mkyoung(entry);
                entry = maybe_mkwrite(pte_mkdirty(entry), vma);
@@ -3237,6 +3243,29 @@ static int __do_fault(struct vm_fault *vmf)
        struct vm_area_struct *vma = vmf->vma;
        int ret;
 
+       /*
+        * Preallocate pte before we take page_lock because this might lead to
+        * deadlocks for memcg reclaim which waits for pages under writeback:
+        *                              lock_page(A)
+        *                              SetPageWriteback(A)
+        *                              unlock_page(A)
+        * lock_page(B)
+        *                              lock_page(B)
+        * pte_alloc_pne
+        *   shrink_page_list
+        *     wait_on_page_writeback(A)
+        *                              SetPageWriteback(B)
+        *                              unlock_page(B)
+        *                              # flush A, B to clear the writeback
+        */
+       if (pmd_none(*vmf->pmd) && !vmf->prealloc_pte) {
+               vmf->prealloc_pte = pte_alloc_one(vmf->vma->vm_mm,
+                                                 vmf->address);
+               if (!vmf->prealloc_pte)
+                       return VM_FAULT_OOM;
+               smp_wmb(); /* See comment in __pte_alloc() */
+       }
+
        ret = vma->vm_ops->fault(vmf);
        if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY |
                            VM_FAULT_DONE_COW)))
@@ -3737,10 +3766,13 @@ static int do_shared_fault(struct vm_fault *vmf)
  * but allow concurrent faults).
  * The mmap_sem may have been released depending on flags and our
  * return value.  See filemap_fault() and __lock_page_or_retry().
+ * If mmap_sem is released, vma may become invalid (for example
+ * by other thread calling munmap()).
  */
 static int do_fault(struct vm_fault *vmf)
 {
        struct vm_area_struct *vma = vmf->vma;
+       struct mm_struct *vm_mm = vma->vm_mm;
        int ret;
 
        /*
@@ -3781,7 +3813,7 @@ static int do_fault(struct vm_fault *vmf)
 
        /* preallocated pagetable is unused: free it */
        if (vmf->prealloc_pte) {
-               pte_free(vma->vm_mm, vmf->prealloc_pte);
+               pte_free(vm_mm, vmf->prealloc_pte);
                vmf->prealloc_pte = NULL;
        }
        return ret;
@@ -4457,7 +4489,9 @@ int __access_remote_vm(struct task_struct *tsk, struct mm_struct *mm,
        void *old_buf = buf;
        int write = gup_flags & FOLL_WRITE;
 
-       down_read(&mm->mmap_sem);
+       if (down_read_killable(&mm->mmap_sem))
+               return 0;
+
        /* ignore errors, just check how much was successfully transferred */
        while (len) {
                int bytes, ret, offset;