]> git.proxmox.com Git - mirror_ubuntu-kernels.git/commitdiff
mm/page_alloc: scale the number of pages that are batch freed
authorMel Gorman <mgorman@techsingularity.net>
Tue, 29 Jun 2021 02:42:18 +0000 (19:42 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Tue, 29 Jun 2021 17:53:54 +0000 (10:53 -0700)
When a task is freeing a large number of order-0 pages, it may acquire the
zone->lock multiple times freeing pages in batches.  This may
unnecessarily contend on the zone lock when freeing very large number of
pages.  This patch adapts the size of the batch based on the recent
pattern to scale the batch size for subsequent frees.

As the machines I used were not large enough to test this are not large
enough to illustrate a problem, a debugging patch shows patterns like the
following (slightly editted for clarity)

Baseline vanilla kernel
  time-unmap-14426   [...] free_pcppages_bulk: free   63 count  378 high  378
  time-unmap-14426   [...] free_pcppages_bulk: free   63 count  378 high  378
  time-unmap-14426   [...] free_pcppages_bulk: free   63 count  378 high  378
  time-unmap-14426   [...] free_pcppages_bulk: free   63 count  378 high  378
  time-unmap-14426   [...] free_pcppages_bulk: free   63 count  378 high  378

With patches
  time-unmap-7724    [...] free_pcppages_bulk: free  126 count  814 high  814
  time-unmap-7724    [...] free_pcppages_bulk: free  252 count  814 high  814
  time-unmap-7724    [...] free_pcppages_bulk: free  504 count  814 high  814
  time-unmap-7724    [...] free_pcppages_bulk: free  751 count  814 high  814
  time-unmap-7724    [...] free_pcppages_bulk: free  751 count  814 high  814

Link: https://lkml.kernel.org/r/20210525080119.5455-5-mgorman@techsingularity.net
Signed-off-by: Mel Gorman <mgorman@techsingularity.net>
Acked-by: Dave Hansen <dave.hansen@linux.intel.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Cc: Hillf Danton <hdanton@sina.com>
Cc: Michal Hocko <mhocko@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
include/linux/mmzone.h
mm/page_alloc.c

index 7937a1d1d166871c73ba305d3b2ba03ceb4dbf1b..0a86b2890a16d6eff0bc469af59bf0228d5db95f 100644 (file)
@@ -343,8 +343,9 @@ struct per_cpu_pages {
        int count;              /* number of pages in the list */
        int high;               /* high watermark, emptying needed */
        int batch;              /* chunk size for buddy add/remove */
+       short free_factor;      /* batch scaling factor during free */
 #ifdef CONFIG_NUMA
-       int expire;             /* When 0, remote pagesets are drained */
+       short expire;           /* When 0, remote pagesets are drained */
 #endif
 
        /* Lists of pages, one per migrate type stored on the pcp-lists */
index 8d196a80382038371539bea33bf77398d1b5b469..e1d1825a2611a5bd57f20e1450bd201dc1eed7b5 100644 (file)
@@ -3278,18 +3278,47 @@ static bool free_unref_page_prepare(struct page *page, unsigned long pfn)
        return true;
 }
 
+static int nr_pcp_free(struct per_cpu_pages *pcp, int high, int batch)
+{
+       int min_nr_free, max_nr_free;
+
+       /* Check for PCP disabled or boot pageset */
+       if (unlikely(high < batch))
+               return 1;
+
+       /* Leave at least pcp->batch pages on the list */
+       min_nr_free = batch;
+       max_nr_free = high - batch;
+
+       /*
+        * Double the number of pages freed each time there is subsequent
+        * freeing of pages without any allocation.
+        */
+       batch <<= pcp->free_factor;
+       if (batch < max_nr_free)
+               pcp->free_factor++;
+       batch = clamp(batch, min_nr_free, max_nr_free);
+
+       return batch;
+}
+
 static void free_unref_page_commit(struct page *page, unsigned long pfn,
                                   int migratetype)
 {
        struct zone *zone = page_zone(page);
        struct per_cpu_pages *pcp;
+       int high;
 
        __count_vm_event(PGFREE);
        pcp = this_cpu_ptr(zone->per_cpu_pageset);
        list_add(&page->lru, &pcp->lists[migratetype]);
        pcp->count++;
-       if (pcp->count >= READ_ONCE(pcp->high))
-               free_pcppages_bulk(zone, READ_ONCE(pcp->batch), pcp);
+       high = READ_ONCE(pcp->high);
+       if (pcp->count >= high) {
+               int batch = READ_ONCE(pcp->batch);
+
+               free_pcppages_bulk(zone, nr_pcp_free(pcp, high, batch), pcp);
+       }
 }
 
 /*
@@ -3541,7 +3570,14 @@ static struct page *rmqueue_pcplist(struct zone *preferred_zone,
        unsigned long flags;
 
        local_lock_irqsave(&pagesets.lock, flags);
+
+       /*
+        * On allocation, reduce the number of pages that are batch freed.
+        * See nr_pcp_free() where free_factor is increased for subsequent
+        * frees.
+        */
        pcp = this_cpu_ptr(zone->per_cpu_pageset);
+       pcp->free_factor >>= 1;
        list = &pcp->lists[migratetype];
        page = __rmqueue_pcplist(zone,  migratetype, alloc_flags, pcp, list);
        local_unlock_irqrestore(&pagesets.lock, flags);
@@ -6737,6 +6773,7 @@ static void per_cpu_pages_init(struct per_cpu_pages *pcp, struct per_cpu_zonesta
         */
        pcp->high = BOOT_PAGESET_HIGH;
        pcp->batch = BOOT_PAGESET_BATCH;
+       pcp->free_factor = 0;
 }
 
 static void __zone_set_pageset_high_and_batch(struct zone *zone, unsigned long high,