return ret;
}
-/*
- * This is like a special single-page "expand_{down|up}wards()",
- * except we must first make sure that 'address{-|+}PAGE_SIZE'
- * doesn't hit another vma.
- */
-static inline int check_stack_guard_page(struct vm_area_struct *vma, unsigned long address)
-{
- address &= PAGE_MASK;
- if ((vma->vm_flags & VM_GROWSDOWN) && address == vma->vm_start) {
- struct vm_area_struct *prev = vma->vm_prev;
-
- /*
- * Is there a mapping abutting this one below?
- *
- * That's only ok if it's the same stack mapping
- * that has gotten split..
- */
- if (prev && prev->vm_end == address)
- return prev->vm_flags & VM_GROWSDOWN ? 0 : -ENOMEM;
-
- return expand_downwards(vma, address - PAGE_SIZE);
- }
- if ((vma->vm_flags & VM_GROWSUP) && address + PAGE_SIZE == vma->vm_end) {
- struct vm_area_struct *next = vma->vm_next;
-
- /* As VM_GROWSDOWN but s/below/above/ */
- if (next && next->vm_start == address + PAGE_SIZE)
- return next->vm_flags & VM_GROWSUP ? 0 : -ENOMEM;
-
- return expand_upwards(vma, address + PAGE_SIZE);
- }
- return 0;
-}
-
/*
* We enter with non-exclusive mmap_sem (to exclude vma changes,
* but allow concurrent faults), and pte mapped but not yet locked.
if (vma->vm_flags & VM_SHARED)
return VM_FAULT_SIGBUS;
- /* Check if we need to add a guard page to the stack */
- if (check_stack_guard_page(vma, vmf->address) < 0)
- return VM_FAULT_SIGSEGV;
-
/*
* Use pte_alloc() instead of pte_alloc_map(). We can't run
* pte_offset_map() on pmds where a huge pmd might be created
ret = 0;
count_vm_event(THP_FILE_MAPPED);
out:
- /*
- * If we are going to fallback to pte mapping, do a
- * withdraw with pmd lock held.
- */
- if (arch_needs_pgtable_deposit() && ret == VM_FAULT_FALLBACK)
- vmf->prealloc_pte = pgtable_trans_huge_withdraw(vma->vm_mm,
- vmf->pmd);
spin_unlock(vmf->ptl);
return ret;
}
ret = do_set_pmd(vmf, page);
if (ret != VM_FAULT_FALLBACK)
- goto fault_handled;
+ return ret;
}
if (!vmf->pte) {
ret = pte_alloc_one_map(vmf);
if (ret)
- goto fault_handled;
+ return ret;
}
/* Re-check under ptl */
- if (unlikely(!pte_none(*vmf->pte))) {
- ret = VM_FAULT_NOPAGE;
- goto fault_handled;
- }
+ if (unlikely(!pte_none(*vmf->pte)))
+ return VM_FAULT_NOPAGE;
flush_icache_page(vma, page);
entry = mk_pte(page, vma->vm_page_prot);
/* no need to invalidate: a not-present page won't be cached */
update_mmu_cache(vma, vmf->address, vmf->pte);
- ret = 0;
-fault_handled:
- /* preallocated pagetable is unused: free it */
- if (vmf->prealloc_pte) {
- pte_free(vmf->vma->vm_mm, vmf->prealloc_pte);
- vmf->prealloc_pte = 0;
- }
- return ret;
+ return 0;
}
static int do_fault(struct vm_fault *vmf)
{
struct vm_area_struct *vma = vmf->vma;
+ int ret;
/* The VMA was not fully populated on mmap() or missing VM_DONTEXPAND */
if (!vma->vm_ops->fault)
- return VM_FAULT_SIGBUS;
- if (!(vmf->flags & FAULT_FLAG_WRITE))
- return do_read_fault(vmf);
- if (!(vma->vm_flags & VM_SHARED))
- return do_cow_fault(vmf);
- return do_shared_fault(vmf);
+ ret = VM_FAULT_SIGBUS;
+ else if (!(vmf->flags & FAULT_FLAG_WRITE))
+ ret = do_read_fault(vmf);
+ else if (!(vma->vm_flags & VM_SHARED))
+ ret = do_cow_fault(vmf);
+ else
+ ret = do_shared_fault(vmf);
+
+ /* preallocated pagetable is unused: free it */
+ if (vmf->prealloc_pte) {
+ pte_free(vma->vm_mm, vmf->prealloc_pte);
+ vmf->prealloc_pte = 0;
+ }
+ return ret;
}
static int numa_migrate_prep(struct page *page, struct vm_area_struct *vma,
int last_cpupid;
int target_nid;
bool migrated = false;
- pte_t pte = vmf->orig_pte;
- bool was_writable = pte_write(pte);
+ pte_t pte;
+ bool was_writable = pte_savedwrite(vmf->orig_pte);
int flags = 0;
/*
- * The "pte" at this point cannot be used safely without
- * validation through pte_unmap_same(). It's of NUMA type but
- * the pfn may be screwed if the read is non atomic.
- *
- * We can safely just do a "set_pte_at()", because the old
- * page table entry is not accessible, so there would be no
- * concurrent hardware modifications to the PTE.
- */
+ * The "pte" at this point cannot be used safely without
+ * validation through pte_unmap_same(). It's of NUMA type but
+ * the pfn may be screwed if the read is non atomic.
+ */
vmf->ptl = pte_lockptr(vma->vm_mm, vmf->pmd);
spin_lock(vmf->ptl);
- if (unlikely(!pte_same(*vmf->pte, pte))) {
+ if (unlikely(!pte_same(*vmf->pte, vmf->orig_pte))) {
pte_unmap_unlock(vmf->pte, vmf->ptl);
goto out;
}
- /* Make it present again */
+ /*
+ * Make it present again, Depending on how arch implementes non
+ * accessible ptes, some can allow access by kernel mode.
+ */
+ pte = ptep_modify_prot_start(vma->vm_mm, vmf->address, vmf->pte);
pte = pte_modify(pte, vma->vm_page_prot);
pte = pte_mkyoung(pte);
if (was_writable)
pte = pte_mkwrite(pte);
- set_pte_at(vma->vm_mm, vmf->address, vmf->pte, pte);
+ ptep_modify_prot_commit(vma->vm_mm, vmf->address, vmf->pte, pte);
update_mmu_cache(vma, vmf->address, vmf->pte);
page = vm_normal_page(vma, vmf->address, pte);
}
#endif /* __PAGETABLE_PMD_FOLDED */
-static int __follow_pte(struct mm_struct *mm, unsigned long address,
- pte_t **ptepp, spinlock_t **ptlp)
+static int __follow_pte_pmd(struct mm_struct *mm, unsigned long address,
+ pte_t **ptepp, pmd_t **pmdpp, spinlock_t **ptlp)
{
pgd_t *pgd;
pud_t *pud;
pmd = pmd_offset(pud, address);
VM_BUG_ON(pmd_trans_huge(*pmd));
- if (pmd_none(*pmd) || unlikely(pmd_bad(*pmd)))
- goto out;
- /* We cannot handle huge page PFN maps. Luckily they don't exist. */
- if (pmd_huge(*pmd))
+ if (pmd_huge(*pmd)) {
+ if (!pmdpp)
+ goto out;
+
+ *ptlp = pmd_lock(mm, pmd);
+ if (pmd_huge(*pmd)) {
+ *pmdpp = pmd;
+ return 0;
+ }
+ spin_unlock(*ptlp);
+ }
+
+ if (pmd_none(*pmd) || unlikely(pmd_bad(*pmd)))
goto out;
ptep = pte_offset_map_lock(mm, pmd, address, ptlp);
return -EINVAL;
}
-int follow_pte(struct mm_struct *mm, unsigned long address, pte_t **ptepp,
- spinlock_t **ptlp)
+static inline int follow_pte(struct mm_struct *mm, unsigned long address,
+ pte_t **ptepp, spinlock_t **ptlp)
+{
+ int res;
+
+ /* (void) is needed to make gcc happy */
+ (void) __cond_lock(*ptlp,
+ !(res = __follow_pte_pmd(mm, address, ptepp, NULL,
+ ptlp)));
+ return res;
+}
+
+int follow_pte_pmd(struct mm_struct *mm, unsigned long address,
+ pte_t **ptepp, pmd_t **pmdpp, spinlock_t **ptlp)
{
int res;
/* (void) is needed to make gcc happy */
(void) __cond_lock(*ptlp,
- !(res = __follow_pte(mm, address, ptepp, ptlp)));
+ !(res = __follow_pte_pmd(mm, address, ptepp, pmdpp,
+ ptlp)));
return res;
}
+EXPORT_SYMBOL(follow_pte_pmd);
/**
* follow_pfn - look up PFN at a user virtual address