]> git.proxmox.com Git - mirror_ubuntu-bionic-kernel.git/blobdiff - mm/huge_memory.c
mm: convert the rest to new page table lock api
[mirror_ubuntu-bionic-kernel.git] / mm / huge_memory.c
index c2082ab4fc9301a8173750f539cf919ba2f70d5e..bccd5a628ea6765478d2fa45dc01390a83ee8a5b 100644 (file)
@@ -710,6 +710,7 @@ static int __do_huge_pmd_anonymous_page(struct mm_struct *mm,
                                        struct page *page)
 {
        pgtable_t pgtable;
+       spinlock_t *ptl;
 
        VM_BUG_ON(!PageCompound(page));
        pgtable = pte_alloc_one(mm, haddr);
@@ -724,9 +725,9 @@ static int __do_huge_pmd_anonymous_page(struct mm_struct *mm,
         */
        __SetPageUptodate(page);
 
-       spin_lock(&mm->page_table_lock);
+       ptl = pmd_lock(mm, pmd);
        if (unlikely(!pmd_none(*pmd))) {
-               spin_unlock(&mm->page_table_lock);
+               spin_unlock(ptl);
                mem_cgroup_uncharge_page(page);
                put_page(page);
                pte_free(mm, pgtable);
@@ -739,7 +740,7 @@ static int __do_huge_pmd_anonymous_page(struct mm_struct *mm,
                set_pmd_at(mm, haddr, pmd, entry);
                add_mm_counter(mm, MM_ANONPAGES, HPAGE_PMD_NR);
                atomic_long_inc(&mm->nr_ptes);
-               spin_unlock(&mm->page_table_lock);
+               spin_unlock(ptl);
        }
 
        return 0;
@@ -759,6 +760,7 @@ static inline struct page *alloc_hugepage_vma(int defrag,
                               HPAGE_PMD_ORDER, vma, haddr, nd);
 }
 
+/* Caller must hold page table lock. */
 static bool set_huge_zero_page(pgtable_t pgtable, struct mm_struct *mm,
                struct vm_area_struct *vma, unsigned long haddr, pmd_t *pmd,
                struct page *zero_page)
@@ -790,6 +792,7 @@ int do_huge_pmd_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
                return VM_FAULT_OOM;
        if (!(flags & FAULT_FLAG_WRITE) &&
                        transparent_hugepage_use_zero_page()) {
+               spinlock_t *ptl;
                pgtable_t pgtable;
                struct page *zero_page;
                bool set;
@@ -802,10 +805,10 @@ int do_huge_pmd_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
                        count_vm_event(THP_FAULT_FALLBACK);
                        return VM_FAULT_FALLBACK;
                }
-               spin_lock(&mm->page_table_lock);
+               ptl = pmd_lock(mm, pmd);
                set = set_huge_zero_page(pgtable, mm, vma, haddr, pmd,
                                zero_page);
-               spin_unlock(&mm->page_table_lock);
+               spin_unlock(ptl);
                if (!set) {
                        pte_free(mm, pgtable);
                        put_huge_zero_page();
@@ -838,6 +841,7 @@ int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm,
                  pmd_t *dst_pmd, pmd_t *src_pmd, unsigned long addr,
                  struct vm_area_struct *vma)
 {
+       spinlock_t *dst_ptl, *src_ptl;
        struct page *src_page;
        pmd_t pmd;
        pgtable_t pgtable;
@@ -848,8 +852,9 @@ int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm,
        if (unlikely(!pgtable))
                goto out;
 
-       spin_lock(&dst_mm->page_table_lock);
-       spin_lock_nested(&src_mm->page_table_lock, SINGLE_DEPTH_NESTING);
+       dst_ptl = pmd_lock(dst_mm, dst_pmd);
+       src_ptl = pmd_lockptr(src_mm, src_pmd);
+       spin_lock_nested(src_ptl, SINGLE_DEPTH_NESTING);
 
        ret = -EAGAIN;
        pmd = *src_pmd;
@@ -858,7 +863,7 @@ int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm,
                goto out_unlock;
        }
        /*
-        * mm->page_table_lock is enough to be sure that huge zero pmd is not
+        * When page table lock is held, the huge zero pmd should not be
         * under splitting since we don't split the page itself, only pmd to
         * a page table.
         */
@@ -879,8 +884,8 @@ int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm,
        }
        if (unlikely(pmd_trans_splitting(pmd))) {
                /* split huge page running from under us */
-               spin_unlock(&src_mm->page_table_lock);
-               spin_unlock(&dst_mm->page_table_lock);
+               spin_unlock(src_ptl);
+               spin_unlock(dst_ptl);
                pte_free(dst_mm, pgtable);
 
                wait_split_huge_page(vma->anon_vma, src_pmd); /* src_vma */
@@ -900,8 +905,8 @@ int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm,
 
        ret = 0;
 out_unlock:
-       spin_unlock(&src_mm->page_table_lock);
-       spin_unlock(&dst_mm->page_table_lock);
+       spin_unlock(src_ptl);
+       spin_unlock(dst_ptl);
 out:
        return ret;
 }
@@ -912,10 +917,11 @@ void huge_pmd_set_accessed(struct mm_struct *mm,
                           pmd_t *pmd, pmd_t orig_pmd,
                           int dirty)
 {
+       spinlock_t *ptl;
        pmd_t entry;
        unsigned long haddr;
 
-       spin_lock(&mm->page_table_lock);
+       ptl = pmd_lock(mm, pmd);
        if (unlikely(!pmd_same(*pmd, orig_pmd)))
                goto unlock;
 
@@ -925,13 +931,14 @@ void huge_pmd_set_accessed(struct mm_struct *mm,
                update_mmu_cache_pmd(vma, address, pmd);
 
 unlock:
-       spin_unlock(&mm->page_table_lock);
+       spin_unlock(ptl);
 }
 
 static int do_huge_pmd_wp_zero_page_fallback(struct mm_struct *mm,
                struct vm_area_struct *vma, unsigned long address,
                pmd_t *pmd, pmd_t orig_pmd, unsigned long haddr)
 {
+       spinlock_t *ptl;
        pgtable_t pgtable;
        pmd_t _pmd;
        struct page *page;
@@ -958,7 +965,7 @@ static int do_huge_pmd_wp_zero_page_fallback(struct mm_struct *mm,
        mmun_end   = haddr + HPAGE_PMD_SIZE;
        mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end);
 
-       spin_lock(&mm->page_table_lock);
+       ptl = pmd_lock(mm, pmd);
        if (unlikely(!pmd_same(*pmd, orig_pmd)))
                goto out_free_page;
 
@@ -985,7 +992,7 @@ static int do_huge_pmd_wp_zero_page_fallback(struct mm_struct *mm,
        }
        smp_wmb(); /* make pte visible before pmd */
        pmd_populate(mm, pmd, pgtable);
-       spin_unlock(&mm->page_table_lock);
+       spin_unlock(ptl);
        put_huge_zero_page();
        inc_mm_counter(mm, MM_ANONPAGES);
 
@@ -995,7 +1002,7 @@ static int do_huge_pmd_wp_zero_page_fallback(struct mm_struct *mm,
 out:
        return ret;
 out_free_page:
-       spin_unlock(&mm->page_table_lock);
+       spin_unlock(ptl);
        mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
        mem_cgroup_uncharge_page(page);
        put_page(page);
@@ -1009,6 +1016,7 @@ static int do_huge_pmd_wp_page_fallback(struct mm_struct *mm,
                                        struct page *page,
                                        unsigned long haddr)
 {
+       spinlock_t *ptl;
        pgtable_t pgtable;
        pmd_t _pmd;
        int ret = 0, i;
@@ -1055,7 +1063,7 @@ static int do_huge_pmd_wp_page_fallback(struct mm_struct *mm,
        mmun_end   = haddr + HPAGE_PMD_SIZE;
        mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end);
 
-       spin_lock(&mm->page_table_lock);
+       ptl = pmd_lock(mm, pmd);
        if (unlikely(!pmd_same(*pmd, orig_pmd)))
                goto out_free_pages;
        VM_BUG_ON(!PageHead(page));
@@ -1081,7 +1089,7 @@ static int do_huge_pmd_wp_page_fallback(struct mm_struct *mm,
        smp_wmb(); /* make pte visible before pmd */
        pmd_populate(mm, pmd, pgtable);
        page_remove_rmap(page);
-       spin_unlock(&mm->page_table_lock);
+       spin_unlock(ptl);
 
        mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
 
@@ -1092,7 +1100,7 @@ out:
        return ret;
 
 out_free_pages:
-       spin_unlock(&mm->page_table_lock);
+       spin_unlock(ptl);
        mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
        mem_cgroup_uncharge_start();
        for (i = 0; i < HPAGE_PMD_NR; i++) {
@@ -1107,17 +1115,19 @@ out_free_pages:
 int do_huge_pmd_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
                        unsigned long address, pmd_t *pmd, pmd_t orig_pmd)
 {
+       spinlock_t *ptl;
        int ret = 0;
        struct page *page = NULL, *new_page;
        unsigned long haddr;
        unsigned long mmun_start;       /* For mmu_notifiers */
        unsigned long mmun_end;         /* For mmu_notifiers */
 
+       ptl = pmd_lockptr(mm, pmd);
        VM_BUG_ON(!vma->anon_vma);
        haddr = address & HPAGE_PMD_MASK;
        if (is_huge_zero_pmd(orig_pmd))
                goto alloc;
-       spin_lock(&mm->page_table_lock);
+       spin_lock(ptl);
        if (unlikely(!pmd_same(*pmd, orig_pmd)))
                goto out_unlock;
 
@@ -1133,7 +1143,7 @@ int do_huge_pmd_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
                goto out_unlock;
        }
        get_page(page);
-       spin_unlock(&mm->page_table_lock);
+       spin_unlock(ptl);
 alloc:
        if (transparent_hugepage_enabled(vma) &&
            !transparent_hugepage_debug_cow())
@@ -1180,11 +1190,11 @@ alloc:
        mmun_end   = haddr + HPAGE_PMD_SIZE;
        mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end);
 
-       spin_lock(&mm->page_table_lock);
+       spin_lock(ptl);
        if (page)
                put_page(page);
        if (unlikely(!pmd_same(*pmd, orig_pmd))) {
-               spin_unlock(&mm->page_table_lock);
+               spin_unlock(ptl);
                mem_cgroup_uncharge_page(new_page);
                put_page(new_page);
                goto out_mn;
@@ -1206,13 +1216,13 @@ alloc:
                }
                ret |= VM_FAULT_WRITE;
        }
-       spin_unlock(&mm->page_table_lock);
+       spin_unlock(ptl);
 out_mn:
        mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
 out:
        return ret;
 out_unlock:
-       spin_unlock(&mm->page_table_lock);
+       spin_unlock(ptl);
        return ret;
 }
 
@@ -1224,7 +1234,7 @@ struct page *follow_trans_huge_pmd(struct vm_area_struct *vma,
        struct mm_struct *mm = vma->vm_mm;
        struct page *page = NULL;
 
-       assert_spin_locked(&mm->page_table_lock);
+       assert_spin_locked(pmd_lockptr(mm, pmd));
 
        if (flags & FOLL_WRITE && !pmd_write(*pmd))
                goto out;
@@ -1271,6 +1281,7 @@ out:
 int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
                                unsigned long addr, pmd_t pmd, pmd_t *pmdp)
 {
+       spinlock_t *ptl;
        struct anon_vma *anon_vma = NULL;
        struct page *page;
        unsigned long haddr = addr & HPAGE_PMD_MASK;
@@ -1280,7 +1291,7 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
        bool migrated = false;
        int flags = 0;
 
-       spin_lock(&mm->page_table_lock);
+       ptl = pmd_lock(mm, pmdp);
        if (unlikely(!pmd_same(pmd, *pmdp)))
                goto out_unlock;
 
@@ -1318,7 +1329,7 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
                 * relock and check_same as the page may no longer be mapped.
                 * As the fault is being retried, do not account for it.
                 */
-               spin_unlock(&mm->page_table_lock);
+               spin_unlock(ptl);
                wait_on_page_locked(page);
                page_nid = -1;
                goto out;
@@ -1326,13 +1337,13 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
 
        /* Page is misplaced, serialise migrations and parallel THP splits */
        get_page(page);
-       spin_unlock(&mm->page_table_lock);
+       spin_unlock(ptl);
        if (!page_locked)
                lock_page(page);
        anon_vma = page_lock_anon_vma_read(page);
 
        /* Confirm the PMD did not change while page_table_lock was released */
-       spin_lock(&mm->page_table_lock);
+       spin_lock(ptl);
        if (unlikely(!pmd_same(pmd, *pmdp))) {
                unlock_page(page);
                put_page(page);
@@ -1344,7 +1355,7 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
         * Migrate the THP to the requested node, returns with page unlocked
         * and pmd_numa cleared.
         */
-       spin_unlock(&mm->page_table_lock);
+       spin_unlock(ptl);
        migrated = migrate_misplaced_transhuge_page(mm, vma,
                                pmdp, pmd, addr, page, target_nid);
        if (migrated) {
@@ -1361,7 +1372,7 @@ clear_pmdnuma:
        update_mmu_cache_pmd(vma, addr, pmdp);
        unlock_page(page);
 out_unlock:
-       spin_unlock(&mm->page_table_lock);
+       spin_unlock(ptl);
 
 out:
        if (anon_vma)
@@ -2371,7 +2382,7 @@ static void collapse_huge_page(struct mm_struct *mm,
        pte_t *pte;
        pgtable_t pgtable;
        struct page *new_page;
-       spinlock_t *ptl;
+       spinlock_t *pmd_ptl, *pte_ptl;
        int isolated;
        unsigned long hstart, hend;
        unsigned long mmun_start;       /* For mmu_notifiers */
@@ -2414,12 +2425,12 @@ static void collapse_huge_page(struct mm_struct *mm,
        anon_vma_lock_write(vma->anon_vma);
 
        pte = pte_offset_map(pmd, address);
-       ptl = pte_lockptr(mm, pmd);
+       pte_ptl = pte_lockptr(mm, pmd);
 
        mmun_start = address;
        mmun_end   = address + HPAGE_PMD_SIZE;
        mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end);
-       spin_lock(&mm->page_table_lock); /* probably unnecessary */
+       pmd_ptl = pmd_lock(mm, pmd); /* probably unnecessary */
        /*
         * After this gup_fast can't run anymore. This also removes
         * any huge TLB entry from the CPU so we won't allow
@@ -2427,16 +2438,16 @@ static void collapse_huge_page(struct mm_struct *mm,
         * to avoid the risk of CPU bugs in that area.
         */
        _pmd = pmdp_clear_flush(vma, address, pmd);
-       spin_unlock(&mm->page_table_lock);
+       spin_unlock(pmd_ptl);
        mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
 
-       spin_lock(ptl);
+       spin_lock(pte_ptl);
        isolated = __collapse_huge_page_isolate(vma, address, pte);
-       spin_unlock(ptl);
+       spin_unlock(pte_ptl);
 
        if (unlikely(!isolated)) {
                pte_unmap(pte);
-               spin_lock(&mm->page_table_lock);
+               spin_lock(pmd_ptl);
                BUG_ON(!pmd_none(*pmd));
                /*
                 * We can only use set_pmd_at when establishing
@@ -2444,7 +2455,7 @@ static void collapse_huge_page(struct mm_struct *mm,
                 * points to regular pagetables. Use pmd_populate for that
                 */
                pmd_populate(mm, pmd, pmd_pgtable(_pmd));
-               spin_unlock(&mm->page_table_lock);
+               spin_unlock(pmd_ptl);
                anon_vma_unlock_write(vma->anon_vma);
                goto out;
        }
@@ -2455,7 +2466,7 @@ static void collapse_huge_page(struct mm_struct *mm,
         */
        anon_vma_unlock_write(vma->anon_vma);
 
-       __collapse_huge_page_copy(pte, new_page, vma, address, ptl);
+       __collapse_huge_page_copy(pte, new_page, vma, address, pte_ptl);
        pte_unmap(pte);
        __SetPageUptodate(new_page);
        pgtable = pmd_pgtable(_pmd);
@@ -2470,13 +2481,13 @@ static void collapse_huge_page(struct mm_struct *mm,
         */
        smp_wmb();
 
-       spin_lock(&mm->page_table_lock);
+       spin_lock(pmd_ptl);
        BUG_ON(!pmd_none(*pmd));
        page_add_new_anon_rmap(new_page, vma, address);
        pgtable_trans_huge_deposit(mm, pmd, pgtable);
        set_pmd_at(mm, address, pmd, _pmd);
        update_mmu_cache_pmd(vma, address, pmd);
-       spin_unlock(&mm->page_table_lock);
+       spin_unlock(pmd_ptl);
 
        *hpage = NULL;
 
@@ -2805,6 +2816,7 @@ static void __split_huge_zero_page_pmd(struct vm_area_struct *vma,
 void __split_huge_page_pmd(struct vm_area_struct *vma, unsigned long address,
                pmd_t *pmd)
 {
+       spinlock_t *ptl;
        struct page *page;
        struct mm_struct *mm = vma->vm_mm;
        unsigned long haddr = address & HPAGE_PMD_MASK;
@@ -2817,22 +2829,22 @@ void __split_huge_page_pmd(struct vm_area_struct *vma, unsigned long address,
        mmun_end   = haddr + HPAGE_PMD_SIZE;
 again:
        mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end);
-       spin_lock(&mm->page_table_lock);
+       ptl = pmd_lock(mm, pmd);
        if (unlikely(!pmd_trans_huge(*pmd))) {
-               spin_unlock(&mm->page_table_lock);
+               spin_unlock(ptl);
                mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
                return;
        }
        if (is_huge_zero_pmd(*pmd)) {
                __split_huge_zero_page_pmd(vma, haddr, pmd);
-               spin_unlock(&mm->page_table_lock);
+               spin_unlock(ptl);
                mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
                return;
        }
        page = pmd_page(*pmd);
        VM_BUG_ON(!page_count(page));
        get_page(page);
-       spin_unlock(&mm->page_table_lock);
+       spin_unlock(ptl);
        mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
 
        split_huge_page(page);