if (haddr < vma->vm_start || haddr + HPAGE_PMD_SIZE > vma->vm_end)
return VM_FAULT_FALLBACK;
- if (vma->vm_flags & VM_LOCKED)
- return VM_FAULT_FALLBACK;
if (unlikely(anon_vma_prepare(vma)))
return VM_FAULT_OOM;
if (unlikely(khugepaged_enter(vma, vma->vm_flags)))
update_mmu_cache_pmd(vma, addr, pmd);
}
if ((flags & FOLL_MLOCK) && (vma->vm_flags & VM_LOCKED)) {
- if (page->mapping && trylock_page(page)) {
+ /*
+ * We don't mlock() pte-mapped THPs. This way we can avoid
+ * leaking mlocked pages into non-VM_LOCKED VMAs.
+ *
+ * In most cases the pmd is the only mapping of the page as we
+ * break COW for the mlock() -- see gup_flags |= FOLL_WRITE for
+ * writable private mappings in populate_vma_page_range().
+ *
+ * The only scenario when we have the page shared here is if we
+ * mlocking read-only mapping shared over fork(). We skip
+ * mlocking such pages.
+ */
+ if (compound_mapcount(page) == 1 && !PageDoubleMap(page) &&
+ page->mapping && trylock_page(page)) {
lru_add_drain();
if (page->mapping)
mlock_vma_page(page);
if ((!(vma->vm_flags & VM_HUGEPAGE) && !khugepaged_always()) ||
(vma->vm_flags & VM_NOHUGEPAGE))
return false;
- if (vma->vm_flags & VM_LOCKED)
- return false;
if (!vma->anon_vma || vma->vm_ops)
return false;
if (is_vma_temporary_stack(vma))
{
spinlock_t *ptl;
struct mm_struct *mm = vma->vm_mm;
+ struct page *page = NULL;
unsigned long haddr = address & HPAGE_PMD_MASK;
mmu_notifier_invalidate_range_start(mm, haddr, haddr + HPAGE_PMD_SIZE);
ptl = pmd_lock(mm, pmd);
- if (likely(pmd_trans_huge(*pmd)))
- __split_huge_pmd_locked(vma, pmd, haddr, false);
+ if (unlikely(!pmd_trans_huge(*pmd)))
+ goto out;
+ page = pmd_page(*pmd);
+ __split_huge_pmd_locked(vma, pmd, haddr, false);
+ if (PageMlocked(page))
+ get_page(page);
+ else
+ page = NULL;
+out:
spin_unlock(ptl);
mmu_notifier_invalidate_range_end(mm, haddr, haddr + HPAGE_PMD_SIZE);
+ if (page) {
+ lock_page(page);
+ munlock_vma_page(page);
+ unlock_page(page);
+ put_page(page);
+ }
}
static void split_huge_pmd_address(struct vm_area_struct *vma,