thp: allow mlocked THP again

[mirror_ubuntu-artful-kernel.git] / mm / huge_memory.c
diff --git a/mm/huge_memory.c b/mm/huge_memory.c

index 4acf55b31f7c65774eedfa41ddd1f82e8295c535..f283cb7c480e15f6ca37ab7b8d1f602ede7edffc 100644 (file)
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -874,8 +874,6 @@ int do_huge_pmd_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
  
         if (haddr < vma->vm_start || haddr + HPAGE_PMD_SIZE > vma->vm_end)
                 return VM_FAULT_FALLBACK;
-       if (vma->vm_flags & VM_LOCKED)
-               return VM_FAULT_FALLBACK;
         if (unlikely(anon_vma_prepare(vma)))
                 return VM_FAULT_OOM;
         if (unlikely(khugepaged_enter(vma, vma->vm_flags)))
@@ -1344,7 +1342,20 @@ struct page *follow_trans_huge_pmd(struct vm_area_struct *vma,
                         update_mmu_cache_pmd(vma, addr, pmd);
         }
         if ((flags & FOLL_MLOCK) && (vma->vm_flags & VM_LOCKED)) {
-               if (page->mapping && trylock_page(page)) {
+               /*
+                * We don't mlock() pte-mapped THPs. This way we can avoid
+                * leaking mlocked pages into non-VM_LOCKED VMAs.
+                *
+                * In most cases the pmd is the only mapping of the page as we
+                * break COW for the mlock() -- see gup_flags |= FOLL_WRITE for
+                * writable private mappings in populate_vma_page_range().
+                *
+                * The only scenario when we have the page shared here is if we
+                * mlocking read-only mapping shared over fork(). We skip
+                * mlocking such pages.
+                */
+               if (compound_mapcount(page) == 1 && !PageDoubleMap(page) &&
+                               page->mapping && trylock_page(page)) {
                         lru_add_drain();
                         if (page->mapping)
                                 mlock_vma_page(page);
@@ -2209,8 +2220,6 @@ static bool hugepage_vma_check(struct vm_area_struct *vma)
         if ((!(vma->vm_flags & VM_HUGEPAGE) && !khugepaged_always()) ||
             (vma->vm_flags & VM_NOHUGEPAGE))
                 return false;
-       if (vma->vm_flags & VM_LOCKED)
-               return false;
         if (!vma->anon_vma || vma->vm_ops)
                 return false;
         if (is_vma_temporary_stack(vma))
@@ -2851,14 +2860,28 @@ void __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
  {
         spinlock_t *ptl;
         struct mm_struct *mm = vma->vm_mm;
+       struct page *page = NULL;
         unsigned long haddr = address & HPAGE_PMD_MASK;
  
         mmu_notifier_invalidate_range_start(mm, haddr, haddr + HPAGE_PMD_SIZE);
         ptl = pmd_lock(mm, pmd);
-       if (likely(pmd_trans_huge(*pmd)))
-               __split_huge_pmd_locked(vma, pmd, haddr, false);
+       if (unlikely(!pmd_trans_huge(*pmd)))
+               goto out;
+       page = pmd_page(*pmd);
+       __split_huge_pmd_locked(vma, pmd, haddr, false);
+       if (PageMlocked(page))
+               get_page(page);
+       else
+               page = NULL;
+out:
         spin_unlock(ptl);
         mmu_notifier_invalidate_range_end(mm, haddr, haddr + HPAGE_PMD_SIZE);
+       if (page) {
+               lock_page(page);
+               munlock_vma_page(page);
+               unlock_page(page);
+               put_page(page);
+       }
  }
  
  static void split_huge_pmd_address(struct vm_area_struct *vma,