1 From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
2 From: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
3 Date: Fri, 29 Sep 2017 17:08:16 +0300
4 Subject: [PATCH] mm/sparsemem: Allocate mem_section at runtime for
5 CONFIG_SPARSEMEM_EXTREME=y
7 Content-Type: text/plain; charset=UTF-8
8 Content-Transfer-Encoding: 8bit
12 Size of the mem_section[] array depends on the size of the physical address space.
14 In preparation for boot-time switching between paging modes on x86-64
15 we need to make the allocation of mem_section[] dynamic, because otherwise
16 we waste a lot of RAM: with CONFIG_NODE_SHIFT=10, mem_section[] size is 32kB
17 for 4-level paging and 2MB for 5-level paging mode.
19 The patch allocates the array on the first call to sparse_memory_present_with_active_regions().
21 Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
22 Cc: Andrew Morton <akpm@linux-foundation.org>
23 Cc: Andy Lutomirski <luto@amacapital.net>
24 Cc: Borislav Petkov <bp@suse.de>
25 Cc: Cyrill Gorcunov <gorcunov@openvz.org>
26 Cc: Linus Torvalds <torvalds@linux-foundation.org>
27 Cc: Peter Zijlstra <peterz@infradead.org>
28 Cc: Thomas Gleixner <tglx@linutronix.de>
29 Cc: linux-mm@kvack.org
30 Link: http://lkml.kernel.org/r/20170929140821.37654-2-kirill.shutemov@linux.intel.com
31 Signed-off-by: Ingo Molnar <mingo@kernel.org>
32 (cherry picked from commit 83e3c48729d9ebb7af5a31a504f3fd6aff0348c4)
33 Signed-off-by: Andy Whitcroft <apw@canonical.com>
34 Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
35 (cherry picked from commit c70f71e01a0ae5d884abae0424618abe90b82011)
36 Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
38 include/linux/mmzone.h | 6 +++++-
39 mm/page_alloc.c | 10 ++++++++++
40 mm/sparse.c | 17 +++++++++++------
41 3 files changed, 26 insertions(+), 7 deletions(-)
43 diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
44 index fc14b8b3f6ce..9c6c001a8c6c 100644
45 --- a/include/linux/mmzone.h
46 +++ b/include/linux/mmzone.h
47 @@ -1137,13 +1137,17 @@ struct mem_section {
48 #define SECTION_ROOT_MASK (SECTIONS_PER_ROOT - 1)
50 #ifdef CONFIG_SPARSEMEM_EXTREME
51 -extern struct mem_section *mem_section[NR_SECTION_ROOTS];
52 +extern struct mem_section **mem_section;
54 extern struct mem_section mem_section[NR_SECTION_ROOTS][SECTIONS_PER_ROOT];
57 static inline struct mem_section *__nr_to_section(unsigned long nr)
59 +#ifdef CONFIG_SPARSEMEM_EXTREME
63 if (!mem_section[SECTION_NR_TO_ROOT(nr)])
65 return &mem_section[SECTION_NR_TO_ROOT(nr)][nr & SECTION_ROOT_MASK];
66 diff --git a/mm/page_alloc.c b/mm/page_alloc.c
67 index 1423da8dd16f..66eb23ab658d 100644
70 @@ -5707,6 +5707,16 @@ void __init sparse_memory_present_with_active_regions(int nid)
71 unsigned long start_pfn, end_pfn;
74 +#ifdef CONFIG_SPARSEMEM_EXTREME
76 + unsigned long size, align;
78 + size = sizeof(struct mem_section) * NR_SECTION_ROOTS;
79 + align = 1 << (INTERNODE_CACHE_SHIFT);
80 + mem_section = memblock_virt_alloc(size, align);
84 for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, &this_nid)
85 memory_present(this_nid, start_pfn, end_pfn);
87 diff --git a/mm/sparse.c b/mm/sparse.c
88 index cdce7a7bb3f3..308a0789d1bb 100644
92 * 1) mem_section - memory sections, mem_map's for valid memory
94 #ifdef CONFIG_SPARSEMEM_EXTREME
95 -struct mem_section *mem_section[NR_SECTION_ROOTS]
96 - ____cacheline_internodealigned_in_smp;
97 +struct mem_section **mem_section;
99 struct mem_section mem_section[NR_SECTION_ROOTS][SECTIONS_PER_ROOT]
100 ____cacheline_internodealigned_in_smp;
101 @@ -104,7 +103,7 @@ static inline int sparse_index_init(unsigned long section_nr, int nid)
102 int __section_nr(struct mem_section* ms)
104 unsigned long root_nr;
105 - struct mem_section* root;
106 + struct mem_section *root = NULL;
108 for (root_nr = 0; root_nr < NR_SECTION_ROOTS; root_nr++) {
109 root = __nr_to_section(root_nr * SECTIONS_PER_ROOT);
110 @@ -115,7 +114,7 @@ int __section_nr(struct mem_section* ms)
114 - VM_BUG_ON(root_nr == NR_SECTION_ROOTS);
117 return (root_nr * SECTIONS_PER_ROOT) + (ms - root);
119 @@ -333,11 +332,17 @@ sparse_early_usemaps_alloc_pgdat_section(struct pglist_data *pgdat,
120 static void __init check_usemap_section_nr(int nid, unsigned long *usemap)
122 unsigned long usemap_snr, pgdat_snr;
123 - static unsigned long old_usemap_snr = NR_MEM_SECTIONS;
124 - static unsigned long old_pgdat_snr = NR_MEM_SECTIONS;
125 + static unsigned long old_usemap_snr;
126 + static unsigned long old_pgdat_snr;
127 struct pglist_data *pgdat = NODE_DATA(nid);
131 + if (!old_usemap_snr) {
132 + old_usemap_snr = NR_MEM_SECTIONS;
133 + old_pgdat_snr = NR_MEM_SECTIONS;
136 usemap_snr = pfn_to_section_nr(__pa(usemap) >> PAGE_SHIFT);
137 pgdat_snr = pfn_to_section_nr(__pa(pgdat) >> PAGE_SHIFT);
138 if (usemap_snr == pgdat_snr)