]> git.proxmox.com Git - mirror_ubuntu-bionic-kernel.git/blobdiff - mm/huge_memory.c
block: Make q_usage_counter also track legacy requests
[mirror_ubuntu-bionic-kernel.git] / mm / huge_memory.c
index 0b51e70e0a8b5918e4de281a7d379eb4ce8c4323..269b5df58543e44d6a283c6268036f010aad37c2 100644 (file)
@@ -928,6 +928,25 @@ int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm,
 
        ret = -EAGAIN;
        pmd = *src_pmd;
+
+#ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION
+       if (unlikely(is_swap_pmd(pmd))) {
+               swp_entry_t entry = pmd_to_swp_entry(pmd);
+
+               VM_BUG_ON(!is_pmd_migration_entry(pmd));
+               if (is_write_migration_entry(entry)) {
+                       make_migration_entry_read(&entry);
+                       pmd = swp_entry_to_pmd(entry);
+                       if (pmd_swp_soft_dirty(*src_pmd))
+                               pmd = pmd_swp_mksoft_dirty(pmd);
+                       set_pmd_at(src_mm, addr, src_pmd, pmd);
+               }
+               set_pmd_at(dst_mm, addr, dst_pmd, pmd);
+               ret = 0;
+               goto out_unlock;
+       }
+#endif
+
        if (unlikely(!pmd_trans_huge(pmd))) {
                pte_free(dst_mm, pgtable);
                goto out_unlock;
@@ -1599,6 +1618,12 @@ bool madvise_free_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
        if (is_huge_zero_pmd(orig_pmd))
                goto out;
 
+       if (unlikely(!pmd_present(orig_pmd))) {
+               VM_BUG_ON(thp_migration_supported() &&
+                                 !is_pmd_migration_entry(orig_pmd));
+               goto out;
+       }
+
        page = pmd_page(orig_pmd);
        /*
         * If other processes are mapping this page, we couldn't discard
@@ -1684,10 +1709,24 @@ int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
                spin_unlock(ptl);
                tlb_remove_page_size(tlb, pmd_page(orig_pmd), HPAGE_PMD_SIZE);
        } else {
-               struct page *page = pmd_page(orig_pmd);
-               page_remove_rmap(page, true);
-               VM_BUG_ON_PAGE(page_mapcount(page) < 0, page);
-               VM_BUG_ON_PAGE(!PageHead(page), page);
+               struct page *page = NULL;
+               int flush_needed = 1;
+
+               if (pmd_present(orig_pmd)) {
+                       page = pmd_page(orig_pmd);
+                       page_remove_rmap(page, true);
+                       VM_BUG_ON_PAGE(page_mapcount(page) < 0, page);
+                       VM_BUG_ON_PAGE(!PageHead(page), page);
+               } else if (thp_migration_supported()) {
+                       swp_entry_t entry;
+
+                       VM_BUG_ON(!is_pmd_migration_entry(orig_pmd));
+                       entry = pmd_to_swp_entry(orig_pmd);
+                       page = pfn_to_page(swp_offset(entry));
+                       flush_needed = 0;
+               } else
+                       WARN_ONCE(1, "Non present huge pmd without pmd migration enabled!");
+
                if (PageAnon(page)) {
                        zap_deposited_table(tlb->mm, pmd);
                        add_mm_counter(tlb->mm, MM_ANONPAGES, -HPAGE_PMD_NR);
@@ -1696,8 +1735,10 @@ int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
                                zap_deposited_table(tlb->mm, pmd);
                        add_mm_counter(tlb->mm, MM_FILEPAGES, -HPAGE_PMD_NR);
                }
+
                spin_unlock(ptl);
-               tlb_remove_page_size(tlb, page, HPAGE_PMD_SIZE);
+               if (flush_needed)
+                       tlb_remove_page_size(tlb, page, HPAGE_PMD_SIZE);
        }
        return 1;
 }
@@ -1717,6 +1758,17 @@ static inline int pmd_move_must_withdraw(spinlock_t *new_pmd_ptl,
 }
 #endif
 
+static pmd_t move_soft_dirty_pmd(pmd_t pmd)
+{
+#ifdef CONFIG_MEM_SOFT_DIRTY
+       if (unlikely(is_pmd_migration_entry(pmd)))
+               pmd = pmd_swp_mksoft_dirty(pmd);
+       else if (pmd_present(pmd))
+               pmd = pmd_mksoft_dirty(pmd);
+#endif
+       return pmd;
+}
+
 bool move_huge_pmd(struct vm_area_struct *vma, unsigned long old_addr,
                  unsigned long new_addr, unsigned long old_end,
                  pmd_t *old_pmd, pmd_t *new_pmd, bool *need_flush)
@@ -1759,7 +1811,8 @@ bool move_huge_pmd(struct vm_area_struct *vma, unsigned long old_addr,
                        pgtable = pgtable_trans_huge_withdraw(mm, old_pmd);
                        pgtable_trans_huge_deposit(mm, new_pmd, pgtable);
                }
-               set_pmd_at(mm, new_addr, new_pmd, pmd_mksoft_dirty(pmd));
+               pmd = move_soft_dirty_pmd(pmd);
+               set_pmd_at(mm, new_addr, new_pmd, pmd);
                if (new_ptl != old_ptl)
                        spin_unlock(new_ptl);
                if (force_flush)
@@ -1794,6 +1847,27 @@ int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
        preserve_write = prot_numa && pmd_write(*pmd);
        ret = 1;
 
+#ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION
+       if (is_swap_pmd(*pmd)) {
+               swp_entry_t entry = pmd_to_swp_entry(*pmd);
+
+               VM_BUG_ON(!is_pmd_migration_entry(*pmd));
+               if (is_write_migration_entry(entry)) {
+                       pmd_t newpmd;
+                       /*
+                        * A protection check is difficult so
+                        * just be safe and disable write
+                        */
+                       make_migration_entry_read(&entry);
+                       newpmd = swp_entry_to_pmd(entry);
+                       if (pmd_swp_soft_dirty(*pmd))
+                               newpmd = pmd_swp_mksoft_dirty(newpmd);
+                       set_pmd_at(mm, addr, pmd, newpmd);
+               }
+               goto unlock;
+       }
+#endif
+
        /*
         * Avoid trapping faults against the zero page. The read-only
         * data is likely to be read-cached on the local CPU and
@@ -1859,7 +1933,8 @@ spinlock_t *__pmd_trans_huge_lock(pmd_t *pmd, struct vm_area_struct *vma)
 {
        spinlock_t *ptl;
        ptl = pmd_lock(vma->vm_mm, pmd);
-       if (likely(pmd_trans_huge(*pmd) || pmd_devmap(*pmd)))
+       if (likely(is_swap_pmd(*pmd) || pmd_trans_huge(*pmd) ||
+                       pmd_devmap(*pmd)))
                return ptl;
        spin_unlock(ptl);
        return NULL;
@@ -1977,14 +2052,15 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
        struct page *page;
        pgtable_t pgtable;
        pmd_t _pmd;
-       bool young, write, dirty, soft_dirty;
+       bool young, write, dirty, soft_dirty, pmd_migration = false;
        unsigned long addr;
        int i;
 
        VM_BUG_ON(haddr & ~HPAGE_PMD_MASK);
        VM_BUG_ON_VMA(vma->vm_start > haddr, vma);
        VM_BUG_ON_VMA(vma->vm_end < haddr + HPAGE_PMD_SIZE, vma);
-       VM_BUG_ON(!pmd_trans_huge(*pmd) && !pmd_devmap(*pmd));
+       VM_BUG_ON(!is_pmd_migration_entry(*pmd) && !pmd_trans_huge(*pmd)
+                               && !pmd_devmap(*pmd));
 
        count_vm_event(THP_SPLIT_PMD);
 
@@ -2009,7 +2085,16 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
                return __split_huge_zero_page_pmd(vma, haddr, pmd);
        }
 
-       page = pmd_page(*pmd);
+#ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION
+       pmd_migration = is_pmd_migration_entry(*pmd);
+       if (pmd_migration) {
+               swp_entry_t entry;
+
+               entry = pmd_to_swp_entry(*pmd);
+               page = pfn_to_page(swp_offset(entry));
+       } else
+#endif
+               page = pmd_page(*pmd);
        VM_BUG_ON_PAGE(!page_count(page), page);
        page_ref_add(page, HPAGE_PMD_NR - 1);
        write = pmd_write(*pmd);
@@ -2028,7 +2113,7 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
                 * transferred to avoid any possibility of altering
                 * permissions across VMAs.
                 */
-               if (freeze) {
+               if (freeze || pmd_migration) {
                        swp_entry_t swp_entry;
                        swp_entry = make_migration_entry(page + i, write);
                        entry = swp_entry_to_pte(swp_entry);
@@ -2127,7 +2212,7 @@ void __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
                page = pmd_page(*pmd);
                if (PageMlocked(page))
                        clear_page_mlock(page);
-       } else if (!pmd_devmap(*pmd))
+       } else if (!(pmd_devmap(*pmd) || is_pmd_migration_entry(*pmd)))
                goto out;
        __split_huge_pmd_locked(vma, pmd, haddr, freeze);
 out:
@@ -2210,7 +2295,7 @@ static void freeze_page(struct page *page)
        VM_BUG_ON_PAGE(!PageHead(page), page);
 
        if (PageAnon(page))
-               ttu_flags |= TTU_MIGRATION;
+               ttu_flags |= TTU_SPLIT_FREEZE;
 
        unmap_success = try_to_unmap(page, ttu_flags);
        VM_BUG_ON_PAGE(!unmap_success, page);
@@ -2745,3 +2830,66 @@ static int __init split_huge_pages_debugfs(void)
 }
 late_initcall(split_huge_pages_debugfs);
 #endif
+
+#ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION
+void set_pmd_migration_entry(struct page_vma_mapped_walk *pvmw,
+               struct page *page)
+{
+       struct vm_area_struct *vma = pvmw->vma;
+       struct mm_struct *mm = vma->vm_mm;
+       unsigned long address = pvmw->address;
+       pmd_t pmdval;
+       swp_entry_t entry;
+       pmd_t pmdswp;
+
+       if (!(pvmw->pmd && !pvmw->pte))
+               return;
+
+       mmu_notifier_invalidate_range_start(mm, address,
+                       address + HPAGE_PMD_SIZE);
+
+       flush_cache_range(vma, address, address + HPAGE_PMD_SIZE);
+       pmdval = *pvmw->pmd;
+       pmdp_invalidate(vma, address, pvmw->pmd);
+       if (pmd_dirty(pmdval))
+               set_page_dirty(page);
+       entry = make_migration_entry(page, pmd_write(pmdval));
+       pmdswp = swp_entry_to_pmd(entry);
+       if (pmd_soft_dirty(pmdval))
+               pmdswp = pmd_swp_mksoft_dirty(pmdswp);
+       set_pmd_at(mm, address, pvmw->pmd, pmdswp);
+       page_remove_rmap(page, true);
+       put_page(page);
+
+       mmu_notifier_invalidate_range_end(mm, address,
+                       address + HPAGE_PMD_SIZE);
+}
+
+void remove_migration_pmd(struct page_vma_mapped_walk *pvmw, struct page *new)
+{
+       struct vm_area_struct *vma = pvmw->vma;
+       struct mm_struct *mm = vma->vm_mm;
+       unsigned long address = pvmw->address;
+       unsigned long mmun_start = address & HPAGE_PMD_MASK;
+       pmd_t pmde;
+       swp_entry_t entry;
+
+       if (!(pvmw->pmd && !pvmw->pte))
+               return;
+
+       entry = pmd_to_swp_entry(*pvmw->pmd);
+       get_page(new);
+       pmde = pmd_mkold(mk_huge_pmd(new, vma->vm_page_prot));
+       if (pmd_swp_soft_dirty(*pvmw->pmd))
+               pmde = pmd_mksoft_dirty(pmde);
+       if (is_write_migration_entry(entry))
+               pmde = maybe_pmd_mkwrite(pmde, vma);
+
+       flush_cache_range(vma, mmun_start, mmun_start + HPAGE_PMD_SIZE);
+       page_add_anon_rmap(new, vma, mmun_start, true);
+       set_pmd_at(mm, mmun_start, pvmw->pmd, pmde);
+       if (vma->vm_flags & VM_LOCKED)
+               mlock_vma_page(new);
+       update_mmu_cache_pmd(vma, address, pvmw->pmd);
+}
+#endif