]> git.proxmox.com Git - mirror_ubuntu-jammy-kernel.git/commitdiff
mm/memory_hotplug: track present pages in memory groups
authorDavid Hildenbrand <david@redhat.com>
Wed, 8 Sep 2021 02:55:30 +0000 (19:55 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Wed, 8 Sep 2021 18:50:23 +0000 (11:50 -0700)
Let's track all present pages in each memory group.  Especially, track
memory present in ZONE_MOVABLE and memory present in one of the kernel
zones (which really only is ZONE_NORMAL right now as memory groups only
apply to hotplugged memory) separately within a memory group, to prepare
for making smart auto-online decision for individual memory blocks within
a memory group based on group statistics.

Link: https://lkml.kernel.org/r/20210806124715.17090-5-david@redhat.com
Signed-off-by: David Hildenbrand <david@redhat.com>
Cc: Anshuman Khandual <anshuman.khandual@arm.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Hui Zhu <teawater@gmail.com>
Cc: Jason Wang <jasowang@redhat.com>
Cc: Len Brown <lenb@kernel.org>
Cc: Marek Kedzierski <mkedzier@redhat.com>
Cc: "Michael S. Tsirkin" <mst@redhat.com>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: Pankaj Gupta <pankaj.gupta.linux@gmail.com>
Cc: Pavel Tatashin <pasha.tatashin@soleen.com>
Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Cc: "Rafael J. Wysocki" <rjw@rjwysocki.net>
Cc: Vitaly Kuznetsov <vkuznets@redhat.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Wei Yang <richard.weiyang@linux.alibaba.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
drivers/base/memory.c
include/linux/memory.h
include/linux/memory_hotplug.h
mm/memory_hotplug.c

index 16f5a3610229a886c237428e9944505d1b8604d1..a1082013e10cb8c3a7f5d7ab4d0998c85d2c8463 100644 (file)
@@ -198,7 +198,7 @@ static int memory_block_online(struct memory_block *mem)
        }
 
        ret = online_pages(start_pfn + nr_vmemmap_pages,
-                          nr_pages - nr_vmemmap_pages, zone);
+                          nr_pages - nr_vmemmap_pages, zone, mem->group);
        if (ret) {
                if (nr_vmemmap_pages)
                        mhp_deinit_memmap_on_memory(start_pfn, nr_vmemmap_pages);
@@ -210,7 +210,7 @@ static int memory_block_online(struct memory_block *mem)
         * now already properly populated.
         */
        if (nr_vmemmap_pages)
-               adjust_present_page_count(pfn_to_page(start_pfn),
+               adjust_present_page_count(pfn_to_page(start_pfn), mem->group,
                                          nr_vmemmap_pages);
 
        return ret;
@@ -228,16 +228,16 @@ static int memory_block_offline(struct memory_block *mem)
         * can properly be torn down in offline_pages().
         */
        if (nr_vmemmap_pages)
-               adjust_present_page_count(pfn_to_page(start_pfn),
+               adjust_present_page_count(pfn_to_page(start_pfn), mem->group,
                                          -nr_vmemmap_pages);
 
        ret = offline_pages(start_pfn + nr_vmemmap_pages,
-                           nr_pages - nr_vmemmap_pages);
+                           nr_pages - nr_vmemmap_pages, mem->group);
        if (ret) {
                /* offline_pages() failed. Account back. */
                if (nr_vmemmap_pages)
                        adjust_present_page_count(pfn_to_page(start_pfn),
-                                                 nr_vmemmap_pages);
+                                                 mem->group, nr_vmemmap_pages);
                return ret;
        }
 
index d505c12c5c777e2b04c155adba897b7f50aa8fd3..6ffdc1db385fa292abba521f96c20527e6508da4 100644 (file)
  * struct memory_group - a logical group of memory blocks
  * @nid: The node id for all memory blocks inside the memory group.
  * @blocks: List of all memory blocks belonging to this memory group.
+ * @present_kernel_pages: Present (online) memory outside ZONE_MOVABLE of this
+ *                       memory group.
+ * @present_movable_pages: Present (online) memory in ZONE_MOVABLE of this
+ *                        memory group.
  * @is_dynamic: The memory group type: static vs. dynamic
  * @s.max_pages: Valid with &memory_group.is_dynamic == false. The maximum
  *              number of pages we'll have in this static memory group.
@@ -48,6 +52,8 @@
 struct memory_group {
        int nid;
        struct list_head memory_blocks;
+       unsigned long present_kernel_pages;
+       unsigned long present_movable_pages;
        bool is_dynamic;
        union {
                struct {
index 5d341978b4bc23d5fc87a9369809c133dd8a91bd..cf3f423c8a74060d9d32334709bbb3a880e51d4e 100644 (file)
@@ -12,6 +12,7 @@ struct zone;
 struct pglist_data;
 struct mem_section;
 struct memory_block;
+struct memory_group;
 struct resource;
 struct vmem_altmap;
 
@@ -100,13 +101,15 @@ static inline void zone_seqlock_init(struct zone *zone)
 extern int zone_grow_free_lists(struct zone *zone, unsigned long new_nr_pages);
 extern int zone_grow_waitqueues(struct zone *zone, unsigned long nr_pages);
 extern int add_one_highpage(struct page *page, int pfn, int bad_ppro);
-extern void adjust_present_page_count(struct page *page, long nr_pages);
+extern void adjust_present_page_count(struct page *page,
+                                     struct memory_group *group,
+                                     long nr_pages);
 /* VM interface that may be used by firmware interface */
 extern int mhp_init_memmap_on_memory(unsigned long pfn, unsigned long nr_pages,
                                     struct zone *zone);
 extern void mhp_deinit_memmap_on_memory(unsigned long pfn, unsigned long nr_pages);
 extern int online_pages(unsigned long pfn, unsigned long nr_pages,
-                       struct zone *zone);
+                       struct zone *zone, struct memory_group *group);
 extern struct zone *test_pages_in_a_zone(unsigned long start_pfn,
                                         unsigned long end_pfn);
 extern void __offline_isolated_pages(unsigned long start_pfn,
@@ -296,7 +299,8 @@ static inline void pgdat_resize_init(struct pglist_data *pgdat) {}
 #ifdef CONFIG_MEMORY_HOTREMOVE
 
 extern void try_offline_node(int nid);
-extern int offline_pages(unsigned long start_pfn, unsigned long nr_pages);
+extern int offline_pages(unsigned long start_pfn, unsigned long nr_pages,
+                        struct memory_group *group);
 extern int remove_memory(u64 start, u64 size);
 extern void __remove_memory(u64 start, u64 size);
 extern int offline_and_remove_memory(u64 start, u64 size);
@@ -304,7 +308,8 @@ extern int offline_and_remove_memory(u64 start, u64 size);
 #else
 static inline void try_offline_node(int nid) {}
 
-static inline int offline_pages(unsigned long start_pfn, unsigned long nr_pages)
+static inline int offline_pages(unsigned long start_pfn, unsigned long nr_pages,
+                               struct memory_group *group)
 {
        return -EINVAL;
 }
index fd57a296dd270b8856d5506bfe037e0d3809ad58..8199a4f98b2b19a0b42c3b138887715f270ce5af 100644 (file)
@@ -915,9 +915,11 @@ struct zone *zone_for_pfn_range(int online_type, int nid,
  * This function should only be called by memory_block_{online,offline},
  * and {online,offline}_pages.
  */
-void adjust_present_page_count(struct page *page, long nr_pages)
+void adjust_present_page_count(struct page *page, struct memory_group *group,
+                              long nr_pages)
 {
        struct zone *zone = page_zone(page);
+       const bool movable = zone_idx(zone) == ZONE_MOVABLE;
 
        /*
         * We only support onlining/offlining/adding/removing of complete
@@ -927,6 +929,11 @@ void adjust_present_page_count(struct page *page, long nr_pages)
                zone->present_early_pages += nr_pages;
        zone->present_pages += nr_pages;
        zone->zone_pgdat->node_present_pages += nr_pages;
+
+       if (group && movable)
+               group->present_movable_pages += nr_pages;
+       else if (group && !movable)
+               group->present_kernel_pages += nr_pages;
 }
 
 int mhp_init_memmap_on_memory(unsigned long pfn, unsigned long nr_pages,
@@ -972,7 +979,8 @@ void mhp_deinit_memmap_on_memory(unsigned long pfn, unsigned long nr_pages)
        kasan_remove_zero_shadow(__va(PFN_PHYS(pfn)), PFN_PHYS(nr_pages));
 }
 
-int __ref online_pages(unsigned long pfn, unsigned long nr_pages, struct zone *zone)
+int __ref online_pages(unsigned long pfn, unsigned long nr_pages,
+                      struct zone *zone, struct memory_group *group)
 {
        unsigned long flags;
        int need_zonelists_rebuild = 0;
@@ -1025,7 +1033,7 @@ int __ref online_pages(unsigned long pfn, unsigned long nr_pages, struct zone *z
        }
 
        online_pages_range(pfn, nr_pages);
-       adjust_present_page_count(pfn_to_page(pfn), nr_pages);
+       adjust_present_page_count(pfn_to_page(pfn), group, nr_pages);
 
        node_states_set_node(nid, &arg);
        if (need_zonelists_rebuild)
@@ -1769,7 +1777,8 @@ static int count_system_ram_pages_cb(unsigned long start_pfn,
        return 0;
 }
 
-int __ref offline_pages(unsigned long start_pfn, unsigned long nr_pages)
+int __ref offline_pages(unsigned long start_pfn, unsigned long nr_pages,
+                       struct memory_group *group)
 {
        const unsigned long end_pfn = start_pfn + nr_pages;
        unsigned long pfn, system_ram_pages = 0;
@@ -1905,7 +1914,7 @@ int __ref offline_pages(unsigned long start_pfn, unsigned long nr_pages)
 
        /* removal success */
        adjust_managed_page_count(pfn_to_page(start_pfn), -nr_pages);
-       adjust_present_page_count(pfn_to_page(start_pfn), -nr_pages);
+       adjust_present_page_count(pfn_to_page(start_pfn), group, -nr_pages);
 
        /* reinitialise watermarks and update pcp limits */
        init_per_zone_wmark_min();