]> git.proxmox.com Git - mirror_ubuntu-kernels.git/commitdiff
mm,thp,rmap: subpages_mapcount COMPOUND_MAPPED if PMD-mapped
authorHugh Dickins <hughd@google.com>
Tue, 22 Nov 2022 09:49:36 +0000 (01:49 -0800)
committerAndrew Morton <akpm@linux-foundation.org>
Wed, 30 Nov 2022 23:58:48 +0000 (15:58 -0800)
Can the lock_compound_mapcount() bit_spin_lock apparatus be removed now?
Yes.  Not by atomic64_t or cmpxchg games, those get difficult on 32-bit;
but if we slightly abuse subpages_mapcount by additionally demanding that
one bit be set there when the compound page is PMD-mapped, then a cascade
of two atomic ops is able to maintain the stats without bit_spin_lock.

This is harder to reason about than when bit_spin_locked, but I believe
safe; and no drift in stats detected when testing.  When there are racing
removes and adds, of course the sequence of operations is less well-
defined; but each operation on subpages_mapcount is atomically good.  What
might be disastrous, is if subpages_mapcount could ever fleetingly appear
negative: but the pte lock (or pmd lock) these rmap functions are called
under, ensures that a last remove cannot race ahead of a first add.

Continue to make an exception for hugetlb (PageHuge) pages, though that
exception can be easily removed by a further commit if necessary: leave
subpages_mapcount 0, don't bother with COMPOUND_MAPPED in its case, just
carry on checking compound_mapcount too in folio_mapped(), page_mapped().

Evidence is that this way goes slightly faster than the previous
implementation in all cases (pmds after ptes now taking around 103ms); and
relieves us of worrying about contention on the bit_spin_lock.

Link: https://lkml.kernel.org/r/3978f3ca-5473-55a7-4e14-efea5968d892@google.com
Signed-off-by: Hugh Dickins <hughd@google.com>
Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Dan Carpenter <error27@gmail.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: James Houghton <jthoughton@google.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: John Hubbard <jhubbard@nvidia.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Miaohe Lin <linmiaohe@huawei.com>
Cc: Mike Kravetz <mike.kravetz@oracle.com>
Cc: Mina Almasry <almasrymina@google.com>
Cc: Muchun Song <songmuchun@bytedance.com>
Cc: Naoya Horiguchi <naoya.horiguchi@linux.dev>
Cc: Peter Xu <peterx@redhat.com>
Cc: Sidhartha Kumar <sidhartha.kumar@oracle.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Yang Shi <shy828301@gmail.com>
Cc: Yu Zhao <yuzhao@google.com>
Cc: Zach O'Keefe <zokeefe@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Documentation/mm/transhuge.rst
include/linux/mm.h
include/linux/rmap.h
mm/page_alloc.c
mm/rmap.c

index af4c9d70321d0cd2dd856d2fc266bd51931efa4c..ec3dc5b042260be66a96277905ba108ff0e2d252 100644 (file)
@@ -118,15 +118,14 @@ pages:
     succeeds on tail pages.
 
   - map/unmap of PMD entry for the whole compound page increment/decrement
-    ->compound_mapcount, stored in the first tail page of the compound page.
+    ->compound_mapcount, stored in the first tail page of the compound page;
+    and also increment/decrement ->subpages_mapcount (also in the first tail)
+    by COMPOUND_MAPPED when compound_mapcount goes from -1 to 0 or 0 to -1.
 
   - map/unmap of sub-pages with PTE entry increment/decrement ->_mapcount
     on relevant sub-page of the compound page, and also increment/decrement
     ->subpages_mapcount, stored in first tail page of the compound page, when
     _mapcount goes from -1 to 0 or 0 to -1: counting sub-pages mapped by PTE.
-    In order to have race-free accounting of sub-pages mapped, changes to
-    sub-page ->_mapcount, ->subpages_mapcount and ->compound_mapcount are
-    are all locked by bit_spin_lock of PG_locked in the first tail ->flags.
 
 split_huge_page internally has to distribute the refcounts in the head
 page to the tail pages before clearing all PG_head/tail bits from the page
index 84fb91f6f56eb5f8c079d3d3ad046a83b14b750a..d33639be3db3ea899301afa9238dc8785586c4fb 100644 (file)
@@ -836,13 +836,22 @@ static inline int head_compound_mapcount(struct page *head)
        return atomic_read(compound_mapcount_ptr(head)) + 1;
 }
 
+/*
+ * If a 16GB hugetlb page were mapped by PTEs of all of its 4kB sub-pages,
+ * its subpages_mapcount would be 0x400000: choose the COMPOUND_MAPPED bit
+ * above that range, instead of 2*(PMD_SIZE/PAGE_SIZE).  Hugetlb currently
+ * leaves subpages_mapcount at 0, but avoid surprise if it participates later.
+ */
+#define COMPOUND_MAPPED        0x800000
+#define SUBPAGES_MAPPED        (COMPOUND_MAPPED - 1)
+
 /*
  * Number of sub-pages mapped by PTE, does not include compound mapcount.
  * Must be called only on head of compound page.
  */
 static inline int head_subpages_mapcount(struct page *head)
 {
-       return atomic_read(subpages_mapcount_ptr(head));
+       return atomic_read(subpages_mapcount_ptr(head)) & SUBPAGES_MAPPED;
 }
 
 /*
@@ -902,8 +911,12 @@ static inline int total_mapcount(struct page *page)
 
 static inline bool folio_large_is_mapped(struct folio *folio)
 {
-       return atomic_read(folio_mapcount_ptr(folio)) +
-               atomic_read(folio_subpages_mapcount_ptr(folio)) >= 0;
+       /*
+        * Reading folio_mapcount_ptr() below could be omitted if hugetlb
+        * participated in incrementing subpages_mapcount when compound mapped.
+        */
+       return atomic_read(folio_subpages_mapcount_ptr(folio)) > 0 ||
+               atomic_read(folio_mapcount_ptr(folio)) >= 0;
 }
 
 /**
index 5dadb9a3e01075a7ea92a95b1ecb8bfede0fac04..bd3504d11b15590f0e41232e234eebc1149baad4 100644 (file)
@@ -204,15 +204,14 @@ void hugepage_add_anon_rmap(struct page *, struct vm_area_struct *,
 void hugepage_add_new_anon_rmap(struct page *, struct vm_area_struct *,
                unsigned long address);
 
-void page_dup_compound_rmap(struct page *page);
+static inline void __page_dup_rmap(struct page *page, bool compound)
+{
+       atomic_inc(compound ? compound_mapcount_ptr(page) : &page->_mapcount);
+}
 
 static inline void page_dup_file_rmap(struct page *page, bool compound)
 {
-       /* Is page being mapped by PTE? */
-       if (likely(!compound))
-               atomic_inc(&page->_mapcount);
-       else
-               page_dup_compound_rmap(page);
+       __page_dup_rmap(page, compound);
 }
 
 /**
@@ -261,7 +260,7 @@ static inline int page_try_dup_anon_rmap(struct page *page, bool compound,
         * the page R/O into both processes.
         */
 dup:
-       page_dup_file_rmap(page, compound);
+       __page_dup_rmap(page, compound);
        return 0;
 }
 
index 0705917ddf54f70228d5434d93c7daf73b630864..c33b6963c2d76127817def53f288126e1731c370 100644 (file)
@@ -1330,7 +1330,7 @@ static int free_tail_pages_check(struct page *head_page, struct page *page)
                        bad_page(page, "nonzero compound_mapcount");
                        goto out;
                }
-               if (unlikely(head_subpages_mapcount(head_page))) {
+               if (unlikely(atomic_read(subpages_mapcount_ptr(head_page)))) {
                        bad_page(page, "nonzero subpages_mapcount");
                        goto out;
                }
index e813785da613fc7e6ddb9dc0324052dfda5f8f8e..459dc1c44d8aee94c5ec2faeaab186533475657a 100644 (file)
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -1085,38 +1085,6 @@ int pfn_mkclean_range(unsigned long pfn, unsigned long nr_pages, pgoff_t pgoff,
        return page_vma_mkclean_one(&pvmw);
 }
 
-struct compound_mapcounts {
-       unsigned int compound_mapcount;
-       unsigned int subpages_mapcount;
-};
-
-/*
- * lock_compound_mapcounts() first locks, then copies subpages_mapcount and
- * compound_mapcount from head[1].compound_mapcount and subpages_mapcount,
- * converting from struct page's internal representation to logical count
- * (that is, adding 1 to compound_mapcount to hide its offset by -1).
- */
-static void lock_compound_mapcounts(struct page *head,
-               struct compound_mapcounts *local)
-{
-       bit_spin_lock(PG_locked, &head[1].flags);
-       local->compound_mapcount = atomic_read(compound_mapcount_ptr(head)) + 1;
-       local->subpages_mapcount = atomic_read(subpages_mapcount_ptr(head));
-}
-
-/*
- * After caller has updated subpage._mapcount, local subpages_mapcount and
- * local compound_mapcount, as necessary, unlock_compound_mapcounts() converts
- * and copies them back to the compound head[1] fields, and then unlocks.
- */
-static void unlock_compound_mapcounts(struct page *head,
-               struct compound_mapcounts *local)
-{
-       atomic_set(compound_mapcount_ptr(head), local->compound_mapcount - 1);
-       atomic_set(subpages_mapcount_ptr(head), local->subpages_mapcount);
-       bit_spin_unlock(PG_locked, &head[1].flags);
-}
-
 int total_compound_mapcount(struct page *head)
 {
        int mapcount = head_compound_mapcount(head);
@@ -1140,34 +1108,6 @@ int total_compound_mapcount(struct page *head)
        return mapcount;
 }
 
-/*
- * page_dup_compound_rmap(), used when copying mm,
- * provides a simple example of using lock_ and unlock_compound_mapcounts().
- */
-void page_dup_compound_rmap(struct page *head)
-{
-       struct compound_mapcounts mapcounts;
-
-       /*
-        * Hugetlb pages could use lock_compound_mapcounts(), like THPs do;
-        * but at present they are still being managed by atomic operations:
-        * which are likely to be somewhat faster, so don't rush to convert
-        * them over without evaluating the effect.
-        *
-        * Note that hugetlb does not call page_add_file_rmap():
-        * here is where hugetlb shared page mapcount is raised.
-        */
-       if (PageHuge(head)) {
-               atomic_inc(compound_mapcount_ptr(head));
-       } else if (PageTransHuge(head)) {
-               /* That test is redundant: it's for safety or to optimize out */
-
-               lock_compound_mapcounts(head, &mapcounts);
-               mapcounts.compound_mapcount++;
-               unlock_compound_mapcounts(head, &mapcounts);
-       }
-}
-
 /**
  * page_move_anon_rmap - move a page to our anon_vma
  * @page:      the page to move to our anon_vma
@@ -1277,7 +1217,7 @@ static void __page_check_anon_rmap(struct page *page,
 void page_add_anon_rmap(struct page *page,
        struct vm_area_struct *vma, unsigned long address, rmap_t flags)
 {
-       struct compound_mapcounts mapcounts;
+       atomic_t *mapped;
        int nr = 0, nr_pmdmapped = 0;
        bool compound = flags & RMAP_COMPOUND;
        bool first = true;
@@ -1290,24 +1230,20 @@ void page_add_anon_rmap(struct page *page,
                first = atomic_inc_and_test(&page->_mapcount);
                nr = first;
                if (first && PageCompound(page)) {
-                       struct page *head = compound_head(page);
-
-                       lock_compound_mapcounts(head, &mapcounts);
-                       mapcounts.subpages_mapcount++;
-                       nr = !mapcounts.compound_mapcount;
-                       unlock_compound_mapcounts(head, &mapcounts);
+                       mapped = subpages_mapcount_ptr(compound_head(page));
+                       nr = atomic_inc_return_relaxed(mapped);
+                       nr = !(nr & COMPOUND_MAPPED);
                }
        } else if (PageTransHuge(page)) {
                /* That test is redundant: it's for safety or to optimize out */
 
-               lock_compound_mapcounts(page, &mapcounts);
-               first = !mapcounts.compound_mapcount;
-               mapcounts.compound_mapcount++;
+               first = atomic_inc_and_test(compound_mapcount_ptr(page));
                if (first) {
+                       mapped = subpages_mapcount_ptr(page);
+                       nr = atomic_add_return_relaxed(COMPOUND_MAPPED, mapped);
                        nr_pmdmapped = thp_nr_pages(page);
-                       nr = nr_pmdmapped - mapcounts.subpages_mapcount;
+                       nr = nr_pmdmapped - (nr & SUBPAGES_MAPPED);
                }
-               unlock_compound_mapcounts(page, &mapcounts);
        }
 
        VM_BUG_ON_PAGE(!first && (flags & RMAP_EXCLUSIVE), page);
@@ -1360,6 +1296,7 @@ void page_add_new_anon_rmap(struct page *page,
                VM_BUG_ON_PAGE(!PageTransHuge(page), page);
                /* increment count (starts at -1) */
                atomic_set(compound_mapcount_ptr(page), 0);
+               atomic_set(subpages_mapcount_ptr(page), COMPOUND_MAPPED);
                nr = thp_nr_pages(page);
                __mod_lruvec_page_state(page, NR_ANON_THPS, nr);
        }
@@ -1379,7 +1316,7 @@ void page_add_new_anon_rmap(struct page *page,
 void page_add_file_rmap(struct page *page,
        struct vm_area_struct *vma, bool compound)
 {
-       struct compound_mapcounts mapcounts;
+       atomic_t *mapped;
        int nr = 0, nr_pmdmapped = 0;
        bool first;
 
@@ -1391,24 +1328,20 @@ void page_add_file_rmap(struct page *page,
                first = atomic_inc_and_test(&page->_mapcount);
                nr = first;
                if (first && PageCompound(page)) {
-                       struct page *head = compound_head(page);
-
-                       lock_compound_mapcounts(head, &mapcounts);
-                       mapcounts.subpages_mapcount++;
-                       nr = !mapcounts.compound_mapcount;
-                       unlock_compound_mapcounts(head, &mapcounts);
+                       mapped = subpages_mapcount_ptr(compound_head(page));
+                       nr = atomic_inc_return_relaxed(mapped);
+                       nr = !(nr & COMPOUND_MAPPED);
                }
        } else if (PageTransHuge(page)) {
                /* That test is redundant: it's for safety or to optimize out */
 
-               lock_compound_mapcounts(page, &mapcounts);
-               first = !mapcounts.compound_mapcount;
-               mapcounts.compound_mapcount++;
+               first = atomic_inc_and_test(compound_mapcount_ptr(page));
                if (first) {
+                       mapped = subpages_mapcount_ptr(page);
+                       nr = atomic_add_return_relaxed(COMPOUND_MAPPED, mapped);
                        nr_pmdmapped = thp_nr_pages(page);
-                       nr = nr_pmdmapped - mapcounts.subpages_mapcount;
+                       nr = nr_pmdmapped - (nr & SUBPAGES_MAPPED);
                }
-               unlock_compound_mapcounts(page, &mapcounts);
        }
 
        if (nr_pmdmapped)
@@ -1432,7 +1365,7 @@ void page_add_file_rmap(struct page *page,
 void page_remove_rmap(struct page *page,
        struct vm_area_struct *vma, bool compound)
 {
-       struct compound_mapcounts mapcounts;
+       atomic_t *mapped;
        int nr = 0, nr_pmdmapped = 0;
        bool last;
 
@@ -1452,24 +1385,20 @@ void page_remove_rmap(struct page *page,
                last = atomic_add_negative(-1, &page->_mapcount);
                nr = last;
                if (last && PageCompound(page)) {
-                       struct page *head = compound_head(page);
-
-                       lock_compound_mapcounts(head, &mapcounts);
-                       mapcounts.subpages_mapcount--;
-                       nr = !mapcounts.compound_mapcount;
-                       unlock_compound_mapcounts(head, &mapcounts);
+                       mapped = subpages_mapcount_ptr(compound_head(page));
+                       nr = atomic_dec_return_relaxed(mapped);
+                       nr = !(nr & COMPOUND_MAPPED);
                }
        } else if (PageTransHuge(page)) {
                /* That test is redundant: it's for safety or to optimize out */
 
-               lock_compound_mapcounts(page, &mapcounts);
-               mapcounts.compound_mapcount--;
-               last = !mapcounts.compound_mapcount;
+               last = atomic_add_negative(-1, compound_mapcount_ptr(page));
                if (last) {
+                       mapped = subpages_mapcount_ptr(page);
+                       nr = atomic_sub_return_relaxed(COMPOUND_MAPPED, mapped);
                        nr_pmdmapped = thp_nr_pages(page);
-                       nr = nr_pmdmapped - mapcounts.subpages_mapcount;
+                       nr = nr_pmdmapped - (nr & SUBPAGES_MAPPED);
                }
-               unlock_compound_mapcounts(page, &mapcounts);
        }
 
        if (nr_pmdmapped) {