]> git.proxmox.com Git - mirror_ubuntu-artful-kernel.git/blobdiff - mm/memory.c
x86/cpu: Make alternative_msr_write work for 32-bit code
[mirror_ubuntu-artful-kernel.git] / mm / memory.c
index e158f7ac67300b10b8827fe6825667506095f550..969c5bf31997f812c0214fe4601b35d391642107 100644 (file)
@@ -68,6 +68,7 @@
 #include <linux/debugfs.h>
 #include <linux/userfaultfd_k.h>
 #include <linux/dax.h>
+#include <linux/oom.h>
 
 #include <asm/io.h>
 #include <asm/mmu_context.h>
@@ -1675,7 +1676,7 @@ int vm_insert_page(struct vm_area_struct *vma, unsigned long addr,
 EXPORT_SYMBOL(vm_insert_page);
 
 static int insert_pfn(struct vm_area_struct *vma, unsigned long addr,
-                       pfn_t pfn, pgprot_t prot)
+                       pfn_t pfn, pgprot_t prot, bool mkwrite)
 {
        struct mm_struct *mm = vma->vm_mm;
        int retval;
@@ -1687,14 +1688,35 @@ static int insert_pfn(struct vm_area_struct *vma, unsigned long addr,
        if (!pte)
                goto out;
        retval = -EBUSY;
-       if (!pte_none(*pte))
-               goto out_unlock;
+       if (!pte_none(*pte)) {
+               if (mkwrite) {
+                       /*
+                        * For read faults on private mappings the PFN passed
+                        * in may not match the PFN we have mapped if the
+                        * mapped PFN is a writeable COW page.  In the mkwrite
+                        * case we are creating a writable PTE for a shared
+                        * mapping and we expect the PFNs to match.
+                        */
+                       if (WARN_ON_ONCE(pte_pfn(*pte) != pfn_t_to_pfn(pfn)))
+                               goto out_unlock;
+                       entry = *pte;
+                       goto out_mkwrite;
+               } else
+                       goto out_unlock;
+       }
 
        /* Ok, finally just insert the thing.. */
        if (pfn_t_devmap(pfn))
                entry = pte_mkdevmap(pfn_t_pte(pfn, prot));
        else
                entry = pte_mkspecial(pfn_t_pte(pfn, prot));
+
+out_mkwrite:
+       if (mkwrite) {
+               entry = pte_mkyoung(entry);
+               entry = maybe_mkwrite(pte_mkdirty(entry), vma);
+       }
+
        set_pte_at(mm, addr, pte, entry);
        update_mmu_cache(vma, addr, pte); /* XXX: why not for insert_page? */
 
@@ -1765,14 +1787,15 @@ int vm_insert_pfn_prot(struct vm_area_struct *vma, unsigned long addr,
 
        track_pfn_insert(vma, &pgprot, __pfn_to_pfn_t(pfn, PFN_DEV));
 
-       ret = insert_pfn(vma, addr, __pfn_to_pfn_t(pfn, PFN_DEV), pgprot);
+       ret = insert_pfn(vma, addr, __pfn_to_pfn_t(pfn, PFN_DEV), pgprot,
+                       false);
 
        return ret;
 }
 EXPORT_SYMBOL(vm_insert_pfn_prot);
 
-int vm_insert_mixed(struct vm_area_struct *vma, unsigned long addr,
-                       pfn_t pfn)
+static int __vm_insert_mixed(struct vm_area_struct *vma, unsigned long addr,
+                       pfn_t pfn, bool mkwrite)
 {
        pgprot_t pgprot = vma->vm_page_prot;
 
@@ -1801,10 +1824,24 @@ int vm_insert_mixed(struct vm_area_struct *vma, unsigned long addr,
                page = pfn_to_page(pfn_t_to_pfn(pfn));
                return insert_page(vma, addr, page, pgprot);
        }
-       return insert_pfn(vma, addr, pfn, pgprot);
+       return insert_pfn(vma, addr, pfn, pgprot, mkwrite);
+}
+
+int vm_insert_mixed(struct vm_area_struct *vma, unsigned long addr,
+                       pfn_t pfn)
+{
+       return __vm_insert_mixed(vma, addr, pfn, false);
+
 }
 EXPORT_SYMBOL(vm_insert_mixed);
 
+int vm_insert_mixed_mkwrite(struct vm_area_struct *vma, unsigned long addr,
+                       pfn_t pfn)
+{
+       return __vm_insert_mixed(vma, addr, pfn, true);
+}
+EXPORT_SYMBOL(vm_insert_mixed_mkwrite);
+
 /*
  * maps a range of physical memory into the requested pages. the old
  * mappings are removed. any references to nonexistent pages results
@@ -2893,6 +2930,7 @@ static int do_anonymous_page(struct vm_fault *vmf)
        struct vm_area_struct *vma = vmf->vma;
        struct mem_cgroup *memcg;
        struct page *page;
+       int ret = 0;
        pte_t entry;
 
        /* File mapping without ->vm_ops ? */
@@ -2925,6 +2963,9 @@ static int do_anonymous_page(struct vm_fault *vmf)
                                vmf->address, &vmf->ptl);
                if (!pte_none(*vmf->pte))
                        goto unlock;
+               ret = check_stable_address_space(vma->vm_mm);
+               if (ret)
+                       goto unlock;
                /* Deliver the page fault to userland, check inside PT lock */
                if (userfaultfd_missing(vma)) {
                        pte_unmap_unlock(vmf->pte, vmf->ptl);
@@ -2959,6 +3000,10 @@ static int do_anonymous_page(struct vm_fault *vmf)
        if (!pte_none(*vmf->pte))
                goto release;
 
+       ret = check_stable_address_space(vma->vm_mm);
+       if (ret)
+               goto release;
+
        /* Deliver the page fault to userland, check inside PT lock */
        if (userfaultfd_missing(vma)) {
                pte_unmap_unlock(vmf->pte, vmf->ptl);
@@ -2978,7 +3023,7 @@ setpte:
        update_mmu_cache(vma, vmf->address, vmf->pte);
 unlock:
        pte_unmap_unlock(vmf->pte, vmf->ptl);
-       return 0;
+       return ret;
 release:
        mem_cgroup_cancel_charge(page, memcg, false);
        put_page(page);
@@ -3252,7 +3297,7 @@ int alloc_set_pte(struct vm_fault *vmf, struct mem_cgroup *memcg,
 int finish_fault(struct vm_fault *vmf)
 {
        struct page *page;
-       int ret;
+       int ret = 0;
 
        /* Did we COW the page? */
        if ((vmf->flags & FAULT_FLAG_WRITE) &&
@@ -3260,7 +3305,15 @@ int finish_fault(struct vm_fault *vmf)
                page = vmf->cow_page;
        else
                page = vmf->page;
-       ret = alloc_set_pte(vmf, vmf->memcg, page);
+
+       /*
+        * check even for read faults because we might have lost our CoWed
+        * page
+        */
+       if (!(vmf->vma->vm_flags & VM_SHARED))
+               ret = check_stable_address_space(vmf->vma->vm_mm);
+       if (!ret)
+               ret = alloc_set_pte(vmf, vmf->memcg, page);
        if (vmf->pte)
                pte_unmap_unlock(vmf->pte, vmf->ptl);
        return ret;
@@ -3871,6 +3924,11 @@ int handle_mm_fault(struct vm_area_struct *vma, unsigned long address,
        /* do counter updates before entering really critical section. */
        check_sync_rss_stat(current);
 
+       if (!arch_vma_access_permitted(vma, flags & FAULT_FLAG_WRITE,
+                                           flags & FAULT_FLAG_INSTRUCTION,
+                                           flags & FAULT_FLAG_REMOTE))
+               return VM_FAULT_SIGSEGV;
+
        /*
         * Enable the memcg OOM handling for faults triggered in user
         * space.  Kernel faults are handled more gracefully.
@@ -3878,11 +3936,6 @@ int handle_mm_fault(struct vm_area_struct *vma, unsigned long address,
        if (flags & FAULT_FLAG_USER)
                mem_cgroup_oom_enable();
 
-       if (!arch_vma_access_permitted(vma, flags & FAULT_FLAG_WRITE,
-                                           flags & FAULT_FLAG_INSTRUCTION,
-                                           flags & FAULT_FLAG_REMOTE))
-               return VM_FAULT_SIGSEGV;
-
        if (unlikely(is_vm_hugetlb_page(vma)))
                ret = hugetlb_fault(vma->vm_mm, vma, address, flags);
        else
@@ -3900,19 +3953,6 @@ int handle_mm_fault(struct vm_area_struct *vma, unsigned long address,
                        mem_cgroup_oom_synchronize(false);
        }
 
-       /*
-        * This mm has been already reaped by the oom reaper and so the
-        * refault cannot be trusted in general. Anonymous refaults would
-        * lose data and give a zero page instead e.g. This is especially
-        * problem for use_mm() because regular tasks will just die and
-        * the corrupted data will not be visible anywhere while kthread
-        * will outlive the oom victim and potentially propagate the date
-        * further.
-        */
-       if (unlikely((current->flags & PF_KTHREAD) && !(ret & VM_FAULT_ERROR)
-                               && test_bit(MMF_UNSTABLE, &vma->vm_mm->flags)))
-               ret = VM_FAULT_SIGBUS;
-
        return ret;
 }
 EXPORT_SYMBOL_GPL(handle_mm_fault);
@@ -4004,7 +4044,8 @@ int __pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address)
 #endif /* __PAGETABLE_PMD_FOLDED */
 
 static int __follow_pte_pmd(struct mm_struct *mm, unsigned long address,
-               pte_t **ptepp, pmd_t **pmdpp, spinlock_t **ptlp)
+                           unsigned long *start, unsigned long *end,
+                           pte_t **ptepp, pmd_t **pmdpp, spinlock_t **ptlp)
 {
        pgd_t *pgd;
        p4d_t *p4d;
@@ -4031,17 +4072,29 @@ static int __follow_pte_pmd(struct mm_struct *mm, unsigned long address,
                if (!pmdpp)
                        goto out;
 
+               if (start && end) {
+                       *start = address & PMD_MASK;
+                       *end = *start + PMD_SIZE;
+                       mmu_notifier_invalidate_range_start(mm, *start, *end);
+               }
                *ptlp = pmd_lock(mm, pmd);
                if (pmd_huge(*pmd)) {
                        *pmdpp = pmd;
                        return 0;
                }
                spin_unlock(*ptlp);
+               if (start && end)
+                       mmu_notifier_invalidate_range_end(mm, *start, *end);
        }
 
        if (pmd_none(*pmd) || unlikely(pmd_bad(*pmd)))
                goto out;
 
+       if (start && end) {
+               *start = address & PAGE_MASK;
+               *end = *start + PAGE_SIZE;
+               mmu_notifier_invalidate_range_start(mm, *start, *end);
+       }
        ptep = pte_offset_map_lock(mm, pmd, address, ptlp);
        if (!pte_present(*ptep))
                goto unlock;
@@ -4049,6 +4102,8 @@ static int __follow_pte_pmd(struct mm_struct *mm, unsigned long address,
        return 0;
 unlock:
        pte_unmap_unlock(ptep, *ptlp);
+       if (start && end)
+               mmu_notifier_invalidate_range_end(mm, *start, *end);
 out:
        return -EINVAL;
 }
@@ -4060,20 +4115,21 @@ static inline int follow_pte(struct mm_struct *mm, unsigned long address,
 
        /* (void) is needed to make gcc happy */
        (void) __cond_lock(*ptlp,
-                          !(res = __follow_pte_pmd(mm, address, ptepp, NULL,
-                                          ptlp)));
+                          !(res = __follow_pte_pmd(mm, address, NULL, NULL,
+                                                   ptepp, NULL, ptlp)));
        return res;
 }
 
 int follow_pte_pmd(struct mm_struct *mm, unsigned long address,
+                            unsigned long *start, unsigned long *end,
                             pte_t **ptepp, pmd_t **pmdpp, spinlock_t **ptlp)
 {
        int res;
 
        /* (void) is needed to make gcc happy */
        (void) __cond_lock(*ptlp,
-                          !(res = __follow_pte_pmd(mm, address, ptepp, pmdpp,
-                                          ptlp)));
+                          !(res = __follow_pte_pmd(mm, address, start, end,
+                                                   ptepp, pmdpp, ptlp)));
        return res;
 }
 EXPORT_SYMBOL(follow_pte_pmd);